Adi362 commited on
Commit
306e869
·
verified ·
1 Parent(s): 6c5e2ed

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +121 -29
app.py CHANGED
@@ -1,5 +1,6 @@
1
  import os
2
  import requests
 
3
  from fastapi import FastAPI
4
  from pydantic import BaseModel
5
  from typing import Optional
@@ -8,17 +9,43 @@ from fastembed import TextEmbedding
8
 
9
  app = FastAPI()
10
 
 
11
  QDRANT_URL = os.environ["QDRANT_URL"].rstrip("/")
12
  QDRANT_API_KEY = os.environ["QDRANT_API_KEY"]
13
  COLLECTION = "well_vectors"
14
 
15
- llm = Llama(
16
- model_path="/app/model.gguf",
17
- n_ctx=4096,
18
- n_threads=2,
19
- n_batch=128,
20
- )
21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  embedder = TextEmbedding(
23
  model_name="BAAI/bge-large-en-v1.5",
24
  )
@@ -30,11 +57,11 @@ class QueryRequest(BaseModel):
30
 
31
  @app.get("/")
32
  def root():
33
- return {"status": "edyx-phy running"}
34
 
35
- @app.post("/v1/query")
36
- def query(req: QueryRequest):
37
- vector = [float(x) for x in next(embedder.embed(req.question))]
38
 
39
  r = requests.post(
40
  f"{QDRANT_URL}/collections/{COLLECTION}/points/search",
@@ -44,20 +71,17 @@ def query(req: QueryRequest):
44
  },
45
  json={
46
  "vector": vector,
47
- "limit": req.top_k,
48
  "with_payload": True,
49
  },
50
  timeout=30,
51
  )
52
 
53
  if r.status_code != 200:
54
- return {"error": "Qdrant search failed", "details": r.text}
55
 
56
  hits = r.json().get("result", [])
57
-
58
- if not hits:
59
- return {"answer": "No relevant scientific data found."}
60
-
61
  collected = []
62
  for h in hits:
63
  payload = h.get("payload", {})
@@ -67,17 +91,57 @@ def query(req: QueryRequest):
67
  collected.append(str(payload["text"]))
68
 
69
  context = "\n\n".join(collected)[:12000]
 
 
 
 
 
 
 
 
 
70
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  prompt = f"""
72
  You are an expert physics researcher and teacher.
73
-
74
  You are given raw, fragmented scientific material retrieved from a large physics knowledge base.
75
  This material may include:
76
  - incomplete sentences
77
  - dataset paths or filenames
78
  - low-level implementation details
79
  - broken or partial explanations
80
-
81
  Your job:
82
  - Use the retrieved material as grounding evidence
83
  - Ignore irrelevant technical artifacts (paths, array shapes, file names)
@@ -85,33 +149,61 @@ Your job:
85
  - Do NOT invent specific papers, experiments, or citations
86
  - Do NOT mention datasets, storage paths, or indexing systems
87
  - Produce a clean, coherent, human-readable explanation
88
-
89
  Style rules:
90
  - Clear, structured explanation
91
  - Intuitive where possible
92
  - Graduate-level physics understanding
93
  - Text-first (formulas only if they genuinely help)
94
  - No raw fragments, no broken sentences
95
-
96
  CONTEXT (retrieved evidence):
97
  {context}
98
-
99
  QUESTION:
100
- {req.question}
101
-
102
  Now produce a high-quality physics explanation that a serious learner would trust.
103
  """
104
 
105
-
106
  out = llm(
107
  prompt,
108
- max_tokens=req.max_tokens,
109
  temperature=0.2,
110
  top_p=0.9,
111
  stop=["SOURCE:", "QUESTION:"],
112
  )
 
 
113
 
114
- return {
115
- "answer": out["choices"][0]["text"].strip(),
116
- "sources_used": len(hits),
117
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
  import requests
3
+ import httpx
4
  from fastapi import FastAPI
5
  from pydantic import BaseModel
6
  from typing import Optional
 
9
 
10
  app = FastAPI()
11
 
12
+ # Qdrant Configuration (unchanged)
13
  QDRANT_URL = os.environ["QDRANT_URL"].rstrip("/")
14
  QDRANT_API_KEY = os.environ["QDRANT_API_KEY"]
15
  COLLECTION = "well_vectors"
16
 
17
+ # Groq API Configuration
18
+ GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
19
+ GROQ_API_URL = "https://api.groq.com/openai/v1/chat/completions"
20
+ GROQ_MODEL = "llama-3.3-70b-versatile" # Best for scientific reasoning
 
 
21
 
22
+ # Physics system prompt for Groq
23
+ PHYSICS_SYSTEM_PROMPT = """You are an expert physics researcher and teacher.
24
+ You are given retrieved scientific material from a physics knowledge base.
25
+ Your job:
26
+ - Use the retrieved material as grounding evidence
27
+ - Ignore irrelevant technical artifacts (paths, array shapes, file names)
28
+ - If information is incomplete, use your physics knowledge to complete the explanation
29
+ - Do NOT invent specific papers, experiments, or citations
30
+ - Produce a clean, coherent, human-readable explanation
31
+ Style: Clear, structured, graduate-level physics understanding."""
32
+
33
+ # Local fallback model (only loaded when needed)
34
+ local_llm = None
35
+
36
+ def get_local_llm():
37
+ global local_llm
38
+ if local_llm is None:
39
+ print("Loading local fallback model...")
40
+ local_llm = Llama(
41
+ model_path="/app/model.gguf",
42
+ n_ctx=4096,
43
+ n_threads=2,
44
+ n_batch=128,
45
+ )
46
+ return local_llm
47
+
48
+ # Embedder (always needed for RAG search)
49
  embedder = TextEmbedding(
50
  model_name="BAAI/bge-large-en-v1.5",
51
  )
 
57
 
58
  @app.get("/")
59
  def root():
60
+ return {"status": "edyx-phy running", "mode": "groq-primary"}
61
 
62
+ def search_qdrant(question: str, top_k: int):
63
+ """Search Qdrant for relevant physics context"""
64
+ vector = [float(x) for x in next(embedder.embed(question))]
65
 
66
  r = requests.post(
67
  f"{QDRANT_URL}/collections/{COLLECTION}/points/search",
 
71
  },
72
  json={
73
  "vector": vector,
74
+ "limit": top_k,
75
  "with_payload": True,
76
  },
77
  timeout=30,
78
  )
79
 
80
  if r.status_code != 200:
81
+ return None, f"Qdrant search failed: {r.text}"
82
 
83
  hits = r.json().get("result", [])
84
+
 
 
 
85
  collected = []
86
  for h in hits:
87
  payload = h.get("payload", {})
 
91
  collected.append(str(payload["text"]))
92
 
93
  context = "\n\n".join(collected)[:12000]
94
+ return context, len(hits)
95
+
96
+ async def call_groq_api(question: str, context: str, max_tokens: int):
97
+ """Try to get response from Groq API"""
98
+ if not GROQ_API_KEY:
99
+ raise Exception("GROQ_API_KEY not configured")
100
+
101
+ user_prompt = f"""CONTEXT (retrieved evidence):
102
+ {context}
103
 
104
+ QUESTION:
105
+ {question}
106
+
107
+ Now produce a high-quality physics explanation that a serious learner would trust."""
108
+
109
+ async with httpx.AsyncClient(timeout=60.0) as client:
110
+ response = await client.post(
111
+ GROQ_API_URL,
112
+ headers={
113
+ "Content-Type": "application/json",
114
+ "Authorization": f"Bearer {GROQ_API_KEY}"
115
+ },
116
+ json={
117
+ "model": GROQ_MODEL,
118
+ "messages": [
119
+ {"role": "system", "content": PHYSICS_SYSTEM_PROMPT},
120
+ {"role": "user", "content": user_prompt}
121
+ ],
122
+ "max_tokens": max_tokens,
123
+ "temperature": 0.2
124
+ }
125
+ )
126
+
127
+ if response.status_code != 200:
128
+ raise Exception(f"Groq API error: {response.status_code} - {response.text}")
129
+
130
+ data = response.json()
131
+ return data["choices"][0]["message"]["content"]
132
+
133
+ def call_local_model(question: str, context: str, max_tokens: int):
134
+ """Fallback to local llama model - YOUR ORIGINAL LOGIC"""
135
+ llm = get_local_llm()
136
+
137
  prompt = f"""
138
  You are an expert physics researcher and teacher.
 
139
  You are given raw, fragmented scientific material retrieved from a large physics knowledge base.
140
  This material may include:
141
  - incomplete sentences
142
  - dataset paths or filenames
143
  - low-level implementation details
144
  - broken or partial explanations
 
145
  Your job:
146
  - Use the retrieved material as grounding evidence
147
  - Ignore irrelevant technical artifacts (paths, array shapes, file names)
 
149
  - Do NOT invent specific papers, experiments, or citations
150
  - Do NOT mention datasets, storage paths, or indexing systems
151
  - Produce a clean, coherent, human-readable explanation
 
152
  Style rules:
153
  - Clear, structured explanation
154
  - Intuitive where possible
155
  - Graduate-level physics understanding
156
  - Text-first (formulas only if they genuinely help)
157
  - No raw fragments, no broken sentences
 
158
  CONTEXT (retrieved evidence):
159
  {context}
 
160
  QUESTION:
161
+ {question}
 
162
  Now produce a high-quality physics explanation that a serious learner would trust.
163
  """
164
 
 
165
  out = llm(
166
  prompt,
167
+ max_tokens=max_tokens,
168
  temperature=0.2,
169
  top_p=0.9,
170
  stop=["SOURCE:", "QUESTION:"],
171
  )
172
+
173
+ return out["choices"][0]["text"].strip()
174
 
175
+ @app.post("/v1/query")
176
+ async def query(req: QueryRequest):
177
+
178
+ context, sources = search_qdrant(req.question, req.top_k)
179
+
180
+ if context is None:
181
+ return {"error": "Qdrant search failed", "details": sources}
182
+
183
+ if not context:
184
+ return {"answer": "No relevant scientific data found.", "sources_used": 0}
185
+
186
+ try:
187
+ answer = await call_groq_api(req.question, context, req.max_tokens)
188
+ return {
189
+ "answer": answer,
190
+ "sources_used": sources,
191
+ "source": "primary"
192
+ }
193
+ except Exception as e:
194
+ print(f"Groq API failed: {e}, falling back to local model...")
195
+
196
+
197
+ try:
198
+ answer = call_local_model(req.question, context, req.max_tokens)
199
+ return {
200
+ "answer": answer,
201
+ "sources_used": sources,
202
+ "source": "fallback"
203
+ }
204
+ except Exception as e:
205
+ return {
206
+ "answer": f"Error: Both primary and fallback failed. {str(e)}",
207
+ "sources_used": 0,
208
+ "source": "error"
209
+ }