ishmeet-yo commited on
Commit
793d986
·
verified ·
1 Parent(s): 91e1530

Update app/llm.py

Browse files
Files changed (1) hide show
  1. app/llm.py +82 -48
app/llm.py CHANGED
@@ -1,54 +1,88 @@
1
  import requests
2
  import os
3
 
4
- HF_TOKEN = os.getenv("HF_TOKEN")
5
- if not HF_TOKEN:
6
- raise RuntimeError("HF_TOKEN not found in environment variables")
7
-
8
  API_URL = "https://router.huggingface.co/v1/chat/completions"
9
 
 
 
 
 
 
 
 
 
 
 
 
10
  def generate_answer(context: str, query: str) -> str:
11
- headers = {
12
- "Authorization": f"Bearer {HF_TOKEN}",
13
- "Content-Type": "application/json",
14
- }
15
-
16
- payload = {
17
- "model": "deepseek-ai/DeepSeek-V3.2",
18
- "messages": [
19
- {
20
- "role": "system",
21
- "content": (
22
- """You are a Harry Potter knowledge assistant.
23
-
24
- Answer the question concisely in 2–3 sentences.
25
- Do not use bullet points.
26
- Do not add extra background.
27
- Be clear and direct."""
28
- ),
29
- },
30
- {
31
- "role": "user",
32
- "content": f"""Context:
33
- {context}
34
-
35
- Question:
36
- {query}
37
-
38
- Answer:
39
- """,
40
- },
41
- ],
42
- "temperature": 0.3,
43
- "max_tokens": 500,
44
- }
45
-
46
- response = requests.post(API_URL, headers=headers, json=payload)
47
-
48
- if response.status_code != 200:
49
- return (
50
- "The magic is unstable right now.\n\n"
51
- f"Model response: {response.status_code}\n\n"
52
- "Try again in a moment."
53
- )
54
- return response.json()["choices"][0]["message"]["content"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import requests
2
  import os
3
 
 
 
 
 
4
  API_URL = "https://router.huggingface.co/v1/chat/completions"
5
 
6
+ def load_tokens():
7
+ tokens = []
8
+ for k, v in os.environ.items():
9
+ if k.startswith("HF_TOKEN_") and v:
10
+ tokens.append(v)
11
+ if not tokens:
12
+ raise RuntimeError("No HF tokens found in environment variables")
13
+ return tokens
14
+
15
+ HF_TOKENS = load_tokens()
16
+
17
  def generate_answer(context: str, query: str) -> str:
18
+ random.shuffle(HF_TOKENS)
19
+
20
+ for token in HF_TOKENS:
21
+ headers = {
22
+ "Authorization": f"Bearer {token}",
23
+ "Content-Type": "application/json",
24
+ }
25
+
26
+ response = requests.post(API_URL, headers=headers, json={
27
+ "model": "deepseek-ai/DeepSeek-V3.2",
28
+ "messages": [
29
+ {"role": "system", "content": "You are a Harry Potter knowledge assistant."},
30
+ {"role": "user", "content": f"Context:\n{context}\nQuestion:\n{query}\nAnswer:"},
31
+ ],
32
+ "temperature": 0.3,
33
+ "max_tokens": 500,
34
+ })
35
+
36
+ if response.status_code == 200:
37
+ return response.json()["choices"][0]["message"]["content"]
38
+
39
+ if response.status_code != 429:
40
+ break # real error, don't retry blindly
41
+
42
+ return "All magical channels are busy right now. Please try again shortly."
43
+
44
+ # def generate_answer(context: str, query: str) -> str:
45
+ # headers = {
46
+ # "Authorization": f"Bearer {HF_TOKEN}",
47
+ # "Content-Type": "application/json",
48
+ # }
49
+
50
+ # payload = {
51
+ # "model": "deepseek-ai/DeepSeek-V3.2",
52
+ # "messages": [
53
+ # {
54
+ # "role": "system",
55
+ # "content": (
56
+ # """You are a Harry Potter knowledge assistant.
57
+
58
+ # Answer the question concisely in 2–3 sentences.
59
+ # Do not use bullet points.
60
+ # Do not add extra background.
61
+ # Be clear and direct."""
62
+ # ),
63
+ # },
64
+ # {
65
+ # "role": "user",
66
+ # "content": f"""Context:
67
+ # {context}
68
+
69
+ # Question:
70
+ # {query}
71
+
72
+ # Answer:
73
+ # """,
74
+ # },
75
+ # ],
76
+ # "temperature": 0.3,
77
+ # "max_tokens": 500,
78
+ # }
79
+
80
+ # response = requests.post(API_URL, headers=headers, json=payload)
81
+
82
+ # if response.status_code != 200:
83
+ # return (
84
+ # "The magic is unstable right now.\n\n"
85
+ # f"Model response: {response.status_code}\n\n"
86
+ # "Try again in a moment."
87
+ # )
88
+ # return response.json()["choices"][0]["message"]["content"]