Spaces:

responsible-prompting
/

demo-multiturn

Sleeping

App Files Files Community

ashwath-vaithina-ibm commited on Jun 23, 2025

Commit

aefc33c

verified ·

1 Parent(s): d8689f7

Upload inference.py

Browse files

Files changed (1) hide show

helpers/inference.py +66 -0

helpers/inference.py ADDED Viewed

	@@ -0,0 +1,66 @@

+from helpers import get_credentials
+import requests
+def hf_inference(prompt, model_id, temperature, max_new_tokens):
+    hf_token, _ = get_credentials.get_hf_credentials()
+    API_URL = "https://router.huggingface.co/together/v1/chat/completions"
+    headers = {
+        "Authorization": f"Bearer {hf_token}",
+    }
+    response = requests.post(
+        API_URL,
+        headers=headers,
+        json={
+            "messages": [
+                {
+                    "role": "user",
+                    "content": [
+                        {
+                            "type": "text",
+                            "text": prompt
+                        },
+                    ]
+                }
+            ],
+            "model": model_id,
+            'temperature': temperature,
+            'max_new_tokens': max_new_tokens,
+        }
+    )
+    return response.json()["choices"][0]["message"]
+def replicate_inference(prompt, model_id, temperature, max_new_tokens):
+    repl_token = get_credentials.get_replicate_credentials()
+    API_URL = f"https://api.replicate.com/v1/models/{model_id}/predictions"
+    headers = {
+        "Authorization": f"Bearer {repl_token}",
+        "Content-Type": "application/json",
+        "Prefer": "wait"
+    }
+    response = requests.post(
+        API_URL,
+        headers=headers,
+        json={
+            "input": {
+                "prompt": prompt,
+                "temperature": temperature,
+                "max_tokens": max_new_tokens,
+            }
+        }
+    )
+    return {
+        "content": "".join(response.json()['output'])
+    }
+INFERENCE_HANDLER = {
+    'huggingface': hf_inference,
+    'replicate': replicate_inference
+}