Spaces:

alexkueck
/

TestInferenceAPI

Sleeping

App Files Files Community

alexkueck commited on Apr 15, 2024

Commit

5ffd14f

verified ·

1 Parent(s): 453eb35

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -3

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import requests
-from huggingface_hub import InferenceClient, login
 from transformers import AutoTokenizer
 from langchain.chat_models import ChatOpenAI
 import os, sys, json
@@ -85,6 +85,32 @@ print ("Inf.Client")
 #API_URL = "https://api-inference.huggingface.co/models/argilla/notux-8x7b-v1"
 HEADERS = {"Authorization": f"Bearer {HUGGINGFACEHUB_API_TOKEN}"}
 ##############################################
 # tokenizer for generating prompt
 ##############################################
@@ -315,7 +341,7 @@ def generate(text, history, rag_option, model_option,  k=3, top_p=0.6, temperatu
         try:
             if (model_option == "HF1"):
                 #Anfrage an InferenceEndpoint1 ----------------------------
-                API_URL = "https://ih7lj8onsvp1wbh0.us-east-1.aws.endpoints.huggingface.cloud" #"https://api-inference.huggingface.co/models/HuggingFaceH4/zephyr-7b-beta"
                 print("HF1")
             else:
                 API_URL = "https://api-inference.huggingface.co/models/tiiuae/falcon-180B-chat"
@@ -350,7 +376,7 @@ def generate(text, history, rag_option, model_option,  k=3, top_p=0.6, temperatu
                 "inputs": prompt,
                 "options": {"max_new_tokens": max_new_tokens},
             }
-            response= requests.post(API_URL, headers=HEADERS, json=data)
             if response != None:
                 result = response.json()
                 print("result:------------------")

 import requests
+from huggingface_hub import InferenceClient, login, create_inference_endpoint
 from transformers import AutoTokenizer
 from langchain.chat_models import ChatOpenAI
 import os, sys, json
 #API_URL = "https://api-inference.huggingface.co/models/argilla/notux-8x7b-v1"
 HEADERS = {"Authorization": f"Bearer {HUGGINGFACEHUB_API_TOKEN}"}
+##############################################
+#Inference Endpoint
+##############################################
+endpoint = create_inference_endpoint(
+    "smaug-72b-v0-1-bmw",
+    repository="abacusai/Smaug-72B-v0.1",
+    framework="pytorch",
+    task="text-generation",
+    accelerator="gpu",
+    vendor="aws",
+    region="us-east-1",
+    type="protected",
+    instance_size="medium",
+    instance_type="g5.2xlarge",
+    custom_image={
+        "health_route": "/health",
+        "env": {
+            "MAX_BATCH_PREFILL_TOKENS": "2048",
+            "MAX_INPUT_LENGTH": "1024",
+            "MAX_TOTAL_TOKENS": "1512",
+            "MODEL_ID": "/repository"
+        },
+        "url": "https://ih7lj8onsvp1wbh0.us-east-1.aws.endpoints.huggingface.cloud",
+    },
+)
 ##############################################
 # tokenizer for generating prompt
 ##############################################
         try:
             if (model_option == "HF1"):
                 #Anfrage an InferenceEndpoint1 ----------------------------
+                API_URL = "https://api-inference.huggingface.co/models/HuggingFaceH4/zephyr-7b-beta"
                 print("HF1")
             else:
                 API_URL = "https://api-inference.huggingface.co/models/tiiuae/falcon-180B-chat"
                 "inputs": prompt,
                 "options": {"max_new_tokens": max_new_tokens},
             }
+            response= endpoint.client.text_generation(prompt)   #requests.post(API_URL, headers=HEADERS, json=data)
             if response != None:
                 result = response.json()
                 print("result:------------------")