Spaces:

ishmeet-yo
/

ISH_harry_potter_rag

Sleeping

App Files Files Community

ishmeet-yo commited on Jan 29

Commit

793d986

verified ·

1 Parent(s): 91e1530

Update app/llm.py

Browse files

Files changed (1) hide show

app/llm.py +82 -48

app/llm.py CHANGED Viewed

@@ -1,54 +1,88 @@
 import requests
 import os
-HF_TOKEN = os.getenv("HF_TOKEN")
-if not HF_TOKEN:
-    raise RuntimeError("HF_TOKEN not found in environment variables")
 API_URL = "https://router.huggingface.co/v1/chat/completions"
 def generate_answer(context: str, query: str) -> str:
-    headers = {
-        "Authorization": f"Bearer {HF_TOKEN}",
-        "Content-Type": "application/json",
-    }
-    payload = {
-        "model": "deepseek-ai/DeepSeek-V3.2",
-        "messages": [
-            {
-                "role": "system",
-                "content": (
-                    """You are a Harry Potter knowledge assistant.
-                    Answer the question concisely in 2–3 sentences.
-                    Do not use bullet points.
-                    Do not add extra background.
-                    Be clear and direct."""
-                ),
-            },
-            {
-                "role": "user",
-                "content": f"""Context:
-{context}
-Question:
-{query}
-Answer:
-""",
-            },
-        ],
-        "temperature": 0.3,
-        "max_tokens": 500,
-    }
-    response = requests.post(API_URL, headers=headers, json=payload)
-    if response.status_code != 200:
-        return (
-        "The magic is unstable right now.\n\n"
-        f"Model response: {response.status_code}\n\n"
-        "Try again in a moment."
-    )
-    return response.json()["choices"][0]["message"]["content"]

 import requests
 import os
 API_URL = "https://router.huggingface.co/v1/chat/completions"
+def load_tokens():
+    tokens = []
+    for k, v in os.environ.items():
+        if k.startswith("HF_TOKEN_") and v:
+            tokens.append(v)
+    if not tokens:
+        raise RuntimeError("No HF tokens found in environment variables")
+    return tokens
+HF_TOKENS = load_tokens()
 def generate_answer(context: str, query: str) -> str:
+    random.shuffle(HF_TOKENS)
+    for token in HF_TOKENS:
+        headers = {
+            "Authorization": f"Bearer {token}",
+            "Content-Type": "application/json",
+        }
+        response = requests.post(API_URL, headers=headers, json={
+            "model": "deepseek-ai/DeepSeek-V3.2",
+            "messages": [
+                {"role": "system", "content": "You are a Harry Potter knowledge assistant."},
+                {"role": "user", "content": f"Context:\n{context}\nQuestion:\n{query}\nAnswer:"},
+            ],
+            "temperature": 0.3,
+            "max_tokens": 500,
+        })
+        if response.status_code == 200:
+            return response.json()["choices"][0]["message"]["content"]
+        if response.status_code != 429:
+            break  # real error, don't retry blindly
+    return "All magical channels are busy right now. Please try again shortly."
+# def generate_answer(context: str, query: str) -> str:
+#     headers = {
+#         "Authorization": f"Bearer {HF_TOKEN}",
+#         "Content-Type": "application/json",
+#     }
+#     payload = {
+#         "model": "deepseek-ai/DeepSeek-V3.2",
+#         "messages": [
+#             {
+#                 "role": "system",
+#                 "content": (
+#                     """You are a Harry Potter knowledge assistant.
+#                     Answer the question concisely in 2–3 sentences.
+#                     Do not use bullet points.
+#                     Do not add extra background.
+#                     Be clear and direct."""
+#                 ),
+#             },
+#             {
+#                 "role": "user",
+#                 "content": f"""Context:
+# {context}
+# Question:
+# {query}
+# Answer:
+# """,
+#             },
+#         ],
+#         "temperature": 0.3,
+#         "max_tokens": 500,
+#     }
+#     response = requests.post(API_URL, headers=headers, json=payload)
+#     if response.status_code != 200:
+#         return (
+#         "The magic is unstable right now.\n\n"
+#         f"Model response: {response.status_code}\n\n"
+#         "Try again in a moment."
+#     )
+#     return response.json()["choices"][0]["message"]["content"]