Spaces:

edyxapi
/

Edyx-Phy

Running

App Files Files Community

Adi362 commited on 18 days ago

Commit

306e869

verified ·

1 Parent(s): 6c5e2ed

Update app.py

Browse files

Files changed (1) hide show

app.py +121 -29

app.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import os
 import requests
 from fastapi import FastAPI
 from pydantic import BaseModel
 from typing import Optional
@@ -8,17 +9,43 @@ from fastembed import TextEmbedding
 app = FastAPI()
 QDRANT_URL = os.environ["QDRANT_URL"].rstrip("/")
 QDRANT_API_KEY = os.environ["QDRANT_API_KEY"]
 COLLECTION = "well_vectors"
-llm = Llama(
-    model_path="/app/model.gguf",
-    n_ctx=4096,
-    n_threads=2,
-    n_batch=128,
-)
 embedder = TextEmbedding(
     model_name="BAAI/bge-large-en-v1.5",
 )
@@ -30,11 +57,11 @@ class QueryRequest(BaseModel):
 @app.get("/")
 def root():
-    return {"status": "edyx-phy running"}
-@app.post("/v1/query")
-def query(req: QueryRequest):
-    vector = [float(x) for x in next(embedder.embed(req.question))]
     r = requests.post(
         f"{QDRANT_URL}/collections/{COLLECTION}/points/search",
@@ -44,20 +71,17 @@ def query(req: QueryRequest):
         },
         json={
             "vector": vector,
-            "limit": req.top_k,
             "with_payload": True,
         },
         timeout=30,
     )
     if r.status_code != 200:
-        return {"error": "Qdrant search failed", "details": r.text}
     hits = r.json().get("result", [])
-    if not hits:
-        return {"answer": "No relevant scientific data found."}
     collected = []
     for h in hits:
         payload = h.get("payload", {})
@@ -67,17 +91,57 @@ def query(req: QueryRequest):
             collected.append(str(payload["text"]))
     context = "\n\n".join(collected)[:12000]
     prompt = f"""
 You are an expert physics researcher and teacher.
 You are given raw, fragmented scientific material retrieved from a large physics knowledge base.
 This material may include:
 - incomplete sentences
 - dataset paths or filenames
 - low-level implementation details
 - broken or partial explanations
 Your job:
 - Use the retrieved material as grounding evidence
 - Ignore irrelevant technical artifacts (paths, array shapes, file names)
@@ -85,33 +149,61 @@ Your job:
 - Do NOT invent specific papers, experiments, or citations
 - Do NOT mention datasets, storage paths, or indexing systems
 - Produce a clean, coherent, human-readable explanation
 Style rules:
 - Clear, structured explanation
 - Intuitive where possible
 - Graduate-level physics understanding
 - Text-first (formulas only if they genuinely help)
 - No raw fragments, no broken sentences
 CONTEXT (retrieved evidence):
 {context}
 QUESTION:
-{req.question}
 Now produce a high-quality physics explanation that a serious learner would trust.
 """
     out = llm(
         prompt,
-        max_tokens=req.max_tokens,
         temperature=0.2,
         top_p=0.9,
         stop=["SOURCE:", "QUESTION:"],
     )
-    return {
-        "answer": out["choices"][0]["text"].strip(),
-        "sources_used": len(hits),
-    }

 import os
 import requests
+import httpx
 from fastapi import FastAPI
 from pydantic import BaseModel
 from typing import Optional
 app = FastAPI()
+# Qdrant Configuration (unchanged)
 QDRANT_URL = os.environ["QDRANT_URL"].rstrip("/")
 QDRANT_API_KEY = os.environ["QDRANT_API_KEY"]
 COLLECTION = "well_vectors"
+# Groq API Configuration
+GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
+GROQ_API_URL = "https://api.groq.com/openai/v1/chat/completions"
+GROQ_MODEL = "llama-3.3-70b-versatile"  # Best for scientific reasoning
+# Physics system prompt for Groq
+PHYSICS_SYSTEM_PROMPT = """You are an expert physics researcher and teacher.
+You are given retrieved scientific material from a physics knowledge base.
+Your job:
+- Use the retrieved material as grounding evidence
+- Ignore irrelevant technical artifacts (paths, array shapes, file names)
+- If information is incomplete, use your physics knowledge to complete the explanation
+- Do NOT invent specific papers, experiments, or citations
+- Produce a clean, coherent, human-readable explanation
+Style: Clear, structured, graduate-level physics understanding."""
+# Local fallback model (only loaded when needed)
+local_llm = None
+def get_local_llm():
+    global local_llm
+    if local_llm is None:
+        print("Loading local fallback model...")
+        local_llm = Llama(
+            model_path="/app/model.gguf",
+            n_ctx=4096,
+            n_threads=2,
+            n_batch=128,
+        )
+    return local_llm
+# Embedder (always needed for RAG search)
 embedder = TextEmbedding(
     model_name="BAAI/bge-large-en-v1.5",
 )
 @app.get("/")
 def root():
+    return {"status": "edyx-phy running", "mode": "groq-primary"}
+def search_qdrant(question: str, top_k: int):
+    """Search Qdrant for relevant physics context"""
+    vector = [float(x) for x in next(embedder.embed(question))]
     r = requests.post(
         f"{QDRANT_URL}/collections/{COLLECTION}/points/search",
         },
         json={
             "vector": vector,
+            "limit": top_k,
             "with_payload": True,
         },
         timeout=30,
     )
     if r.status_code != 200:
+        return None, f"Qdrant search failed: {r.text}"
     hits = r.json().get("result", [])
     collected = []
     for h in hits:
         payload = h.get("payload", {})
             collected.append(str(payload["text"]))
     context = "\n\n".join(collected)[:12000]
+    return context, len(hits)
+async def call_groq_api(question: str, context: str, max_tokens: int):
+    """Try to get response from Groq API"""
+    if not GROQ_API_KEY:
+        raise Exception("GROQ_API_KEY not configured")
+    user_prompt = f"""CONTEXT (retrieved evidence):
+{context}
+QUESTION:
+{question}
+Now produce a high-quality physics explanation that a serious learner would trust."""
+    async with httpx.AsyncClient(timeout=60.0) as client:
+        response = await client.post(
+            GROQ_API_URL,
+            headers={
+                "Content-Type": "application/json",
+                "Authorization": f"Bearer {GROQ_API_KEY}"
+            },
+            json={
+                "model": GROQ_MODEL,
+                "messages": [
+                    {"role": "system", "content": PHYSICS_SYSTEM_PROMPT},
+                    {"role": "user", "content": user_prompt}
+                ],
+                "max_tokens": max_tokens,
+                "temperature": 0.2
+            }
+        )
+        if response.status_code != 200:
+            raise Exception(f"Groq API error: {response.status_code} - {response.text}")
+        data = response.json()
+        return data["choices"][0]["message"]["content"]
+def call_local_model(question: str, context: str, max_tokens: int):
+    """Fallback to local llama model - YOUR ORIGINAL LOGIC"""
+    llm = get_local_llm()
     prompt = f"""
 You are an expert physics researcher and teacher.
 You are given raw, fragmented scientific material retrieved from a large physics knowledge base.
 This material may include:
 - incomplete sentences
 - dataset paths or filenames
 - low-level implementation details
 - broken or partial explanations
 Your job:
 - Use the retrieved material as grounding evidence
 - Ignore irrelevant technical artifacts (paths, array shapes, file names)
 - Do NOT invent specific papers, experiments, or citations
 - Do NOT mention datasets, storage paths, or indexing systems
 - Produce a clean, coherent, human-readable explanation
 Style rules:
 - Clear, structured explanation
 - Intuitive where possible
 - Graduate-level physics understanding
 - Text-first (formulas only if they genuinely help)
 - No raw fragments, no broken sentences
 CONTEXT (retrieved evidence):
 {context}
 QUESTION:
+{question}
 Now produce a high-quality physics explanation that a serious learner would trust.
 """
     out = llm(
         prompt,
+        max_tokens=max_tokens,
         temperature=0.2,
         top_p=0.9,
         stop=["SOURCE:", "QUESTION:"],
     )
+    return out["choices"][0]["text"].strip()
+@app.post("/v1/query")
+async def query(req: QueryRequest):
+    context, sources = search_qdrant(req.question, req.top_k)
+    if context is None:
+        return {"error": "Qdrant search failed", "details": sources}
+    if not context:
+        return {"answer": "No relevant scientific data found.", "sources_used": 0}
+    try:
+        answer = await call_groq_api(req.question, context, req.max_tokens)
+        return {
+            "answer": answer,
+            "sources_used": sources,
+            "source": "primary"
+        }
+    except Exception as e:
+        print(f"Groq API failed: {e}, falling back to local model...")
+    try:
+        answer = call_local_model(req.question, context, req.max_tokens)
+        return {
+            "answer": answer,
+            "sources_used": sources,
+            "source": "fallback"
+        }
+    except Exception as e:
+        return {
+            "answer": f"Error: Both primary and fallback failed. {str(e)}",
+            "sources_used": 0,
+            "source": "error"
+        }