Adi362 commited on
Commit
b348ed1
·
verified ·
1 Parent(s): 6de4622

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -26
app.py CHANGED
@@ -1,7 +1,7 @@
1
  import os
2
  import requests
3
  import httpx
4
- from fastapi import FastAPI
5
  from pydantic import BaseModel
6
  from typing import Optional
7
  from llama_cpp import Llama
@@ -9,17 +9,16 @@ from fastembed import TextEmbedding
9
 
10
  app = FastAPI()
11
 
12
- # Qdrant Configuration (unchanged)
13
  QDRANT_URL = os.environ["QDRANT_URL"].rstrip("/")
14
  QDRANT_API_KEY = os.environ["QDRANT_API_KEY"]
15
  COLLECTION = "well_vectors"
16
 
17
- # Groq API Configuration
18
- GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
19
- GROQ_API_URL = "https://api.groq.com/openai/v1/chat/completions"
20
- GROQ_MODEL = "llama-3.3-70b-versatile" # Best for scientific reasoning
 
21
 
22
- # Physics system prompt for Groq
23
  PHYSICS_SYSTEM_PROMPT = """You are an expert physics researcher and teacher.
24
  You are given retrieved scientific material from a physics knowledge base.
25
  Your job:
@@ -30,7 +29,6 @@ Your job:
30
  - Produce a clean, coherent, human-readable explanation
31
  Style: Clear, structured, graduate-level physics understanding."""
32
 
33
- # Local fallback model (only loaded when needed)
34
  local_llm = None
35
 
36
  def get_local_llm():
@@ -45,7 +43,6 @@ def get_local_llm():
45
  )
46
  return local_llm
47
 
48
- # Embedder (always needed for RAG search)
49
  embedder = TextEmbedding(
50
  model_name="BAAI/bge-large-en-v1.5",
51
  )
@@ -55,12 +52,16 @@ class QueryRequest(BaseModel):
55
  top_k: Optional[int] = 5
56
  max_tokens: Optional[int] = 512
57
 
 
 
 
 
 
58
  @app.get("/")
59
  def root():
60
- return {"status": "edyx-phy running", "mode": "groq-primary"}
61
 
62
  def search_qdrant(question: str, top_k: int):
63
- """Search Qdrant for relevant physics context"""
64
  vector = [float(x) for x in next(embedder.embed(question))]
65
 
66
  r = requests.post(
@@ -93,28 +94,25 @@ def search_qdrant(question: str, top_k: int):
93
  context = "\n\n".join(collected)[:12000]
94
  return context, len(hits)
95
 
96
- async def call_groq_api(question: str, context: str, max_tokens: int):
97
- """Try to get response from Groq API"""
98
- if not GROQ_API_KEY:
99
- raise Exception("GROQ_API_KEY not configured")
100
 
101
  user_prompt = f"""CONTEXT (retrieved evidence):
102
  {context}
103
-
104
  QUESTION:
105
  {question}
106
-
107
  Now produce a high-quality physics explanation that a serious learner would trust."""
108
 
109
  async with httpx.AsyncClient(timeout=60.0) as client:
110
  response = await client.post(
111
- GROQ_API_URL,
112
  headers={
113
  "Content-Type": "application/json",
114
- "Authorization": f"Bearer {GROQ_API_KEY}"
115
  },
116
  json={
117
- "model": GROQ_MODEL,
118
  "messages": [
119
  {"role": "system", "content": PHYSICS_SYSTEM_PROMPT},
120
  {"role": "user", "content": user_prompt}
@@ -125,13 +123,12 @@ Now produce a high-quality physics explanation that a serious learner would trus
125
  )
126
 
127
  if response.status_code != 200:
128
- raise Exception(f"Groq API error: {response.status_code} - {response.text}")
129
 
130
  data = response.json()
131
  return data["choices"][0]["message"]["content"]
132
 
133
  def call_local_model(question: str, context: str, max_tokens: int):
134
- """Fallback to local llama model - YOUR ORIGINAL LOGIC"""
135
  llm = get_local_llm()
136
 
137
  prompt = f"""
@@ -145,7 +142,7 @@ This material may include:
145
  Your job:
146
  - Use the retrieved material as grounding evidence
147
  - Ignore irrelevant technical artifacts (paths, array shapes, file names)
148
- - If the retrieved information is incomplete or fragmented, use your general physics knowledge to complete the explanation
149
  - Do NOT invent specific papers, experiments, or citations
150
  - Do NOT mention datasets, storage paths, or indexing systems
151
  - Produce a clean, coherent, human-readable explanation
@@ -172,7 +169,7 @@ Now produce a high-quality physics explanation that a serious learner would trus
172
 
173
  return out["choices"][0]["text"].strip()
174
 
175
- @app.post("/v1/query")
176
  async def query(req: QueryRequest):
177
 
178
  context, sources = search_qdrant(req.question, req.top_k)
@@ -184,14 +181,14 @@ async def query(req: QueryRequest):
184
  return {"answer": "No relevant scientific data found.", "sources_used": 0}
185
 
186
  try:
187
- answer = await call_groq_api(req.question, context, req.max_tokens)
188
  return {
189
  "answer": answer,
190
  "sources_used": sources,
191
  "source": "primary"
192
  }
193
  except Exception as e:
194
- print(f"Groq API failed: {e}, falling back to local model...")
195
 
196
 
197
  try:
 
1
  import os
2
  import requests
3
  import httpx
4
+ from fastapi import FastAPI, HTTPException, Security, Header
5
  from pydantic import BaseModel
6
  from typing import Optional
7
  from llama_cpp import Llama
 
9
 
10
  app = FastAPI()
11
 
 
12
  QDRANT_URL = os.environ["QDRANT_URL"].rstrip("/")
13
  QDRANT_API_KEY = os.environ["QDRANT_API_KEY"]
14
  COLLECTION = "well_vectors"
15
 
16
+ SERVICE_API_KEY = os.environ.get("SERVICE_API_KEY")
17
+ SERVICE_API_URL = "https://api.groq.com/openai/v1/chat/completions"
18
+ SERVICE_MODEL = "llama-3.3-70b-versatile"
19
+
20
+ EDYX_ACCESS_TOKEN = os.environ.get("EDYX_ACCESS_TOKEN")
21
 
 
22
  PHYSICS_SYSTEM_PROMPT = """You are an expert physics researcher and teacher.
23
  You are given retrieved scientific material from a physics knowledge base.
24
  Your job:
 
29
  - Produce a clean, coherent, human-readable explanation
30
  Style: Clear, structured, graduate-level physics understanding."""
31
 
 
32
  local_llm = None
33
 
34
  def get_local_llm():
 
43
  )
44
  return local_llm
45
 
 
46
  embedder = TextEmbedding(
47
  model_name="BAAI/bge-large-en-v1.5",
48
  )
 
52
  top_k: Optional[int] = 5
53
  max_tokens: Optional[int] = 512
54
 
55
+ async def verify_token(x_edyx_token: str = Header(None)):
56
+ if EDYX_ACCESS_TOKEN and x_edyx_token != EDYX_ACCESS_TOKEN:
57
+ raise HTTPException(status_code=403, detail="Unauthorized: Invalid Access Token")
58
+ return x_edyx_token
59
+
60
  @app.get("/")
61
  def root():
62
+ return {"status": "edyx-phy running", "mode": "accelerated-primary"}
63
 
64
  def search_qdrant(question: str, top_k: int):
 
65
  vector = [float(x) for x in next(embedder.embed(question))]
66
 
67
  r = requests.post(
 
94
  context = "\n\n".join(collected)[:12000]
95
  return context, len(hits)
96
 
97
+ async def call_service_api(question: str, context: str, max_tokens: int):
98
+ if not SERVICE_API_KEY:
99
+ raise Exception("Service API key not configured")
 
100
 
101
  user_prompt = f"""CONTEXT (retrieved evidence):
102
  {context}
 
103
  QUESTION:
104
  {question}
 
105
  Now produce a high-quality physics explanation that a serious learner would trust."""
106
 
107
  async with httpx.AsyncClient(timeout=60.0) as client:
108
  response = await client.post(
109
+ SERVICE_API_URL,
110
  headers={
111
  "Content-Type": "application/json",
112
+ "Authorization": f"Bearer {SERVICE_API_KEY}"
113
  },
114
  json={
115
+ "model": SERVICE_MODEL,
116
  "messages": [
117
  {"role": "system", "content": PHYSICS_SYSTEM_PROMPT},
118
  {"role": "user", "content": user_prompt}
 
123
  )
124
 
125
  if response.status_code != 200:
126
+ raise Exception(f"Service API error: {response.status_code} - {response.text}")
127
 
128
  data = response.json()
129
  return data["choices"][0]["message"]["content"]
130
 
131
  def call_local_model(question: str, context: str, max_tokens: int):
 
132
  llm = get_local_llm()
133
 
134
  prompt = f"""
 
142
  Your job:
143
  - Use the retrieved material as grounding evidence
144
  - Ignore irrelevant technical artifacts (paths, array shapes, file names)
145
+ - If the retrieved information is incomplete, use your physics knowledge to complete the explanation
146
  - Do NOT invent specific papers, experiments, or citations
147
  - Do NOT mention datasets, storage paths, or indexing systems
148
  - Produce a clean, coherent, human-readable explanation
 
169
 
170
  return out["choices"][0]["text"].strip()
171
 
172
+ @app.post("/v1/query", dependencies=[Security(verify_token)])
173
  async def query(req: QueryRequest):
174
 
175
  context, sources = search_qdrant(req.question, req.top_k)
 
181
  return {"answer": "No relevant scientific data found.", "sources_used": 0}
182
 
183
  try:
184
+ answer = await call_service_api(req.question, context, req.max_tokens)
185
  return {
186
  "answer": answer,
187
  "sources_used": sources,
188
  "source": "primary"
189
  }
190
  except Exception as e:
191
+ print(f"Service API failed: {e}, falling back to local model...")
192
 
193
 
194
  try: