File size: 6,542 Bytes
529e4dd 5e4062e 306e869 b348ed1 529e4dd 6a1f3e2 529e4dd 5e4062e 529e4dd 5e4062e 529e4dd b348ed1 6a1f3e2 306e869 6a1f3e2 529e4dd 6a1f3e2 529e4dd b348ed1 529e4dd b348ed1 529e4dd 306e869 6a1f3e2 306e869 6a1f3e2 306e869 6a1f3e2 306e869 b47ad05 6a1f3e2 b47ad05 6a1f3e2 b47ad05 6a1f3e2 b47ad05 306e869 b348ed1 306e869 b348ed1 306e869 b348ed1 306e869 b348ed1 306e869 b348ed1 306e869 6a1f3e2 6c5e2ed b348ed1 6c5e2ed 6a1f3e2 b47ad05 306e869 6c5e2ed 6a1f3e2 306e869 6a1f3e2 b47ad05 6a1f3e2 306e869 6a1f3e2 b348ed1 306e869 b348ed1 306e869 b348ed1 306e869 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 |
import os
import requests
import httpx
from fastapi import FastAPI, HTTPException, Security, Header
from pydantic import BaseModel
from typing import Optional
from llama_cpp import Llama
from fastembed import TextEmbedding
app = FastAPI()
QDRANT_URL = os.environ["QDRANT_URL"].rstrip("/")
QDRANT_API_KEY = os.environ["QDRANT_API_KEY"]
COLLECTION = "well_vectors"
SERVICE_API_KEY = os.environ.get("SERVICE_API_KEY")
SERVICE_API_URL = "https://api.groq.com/openai/v1/chat/completions"
SERVICE_MODEL = "llama-3.3-70b-versatile"
EDYX_ACCESS_TOKEN = os.environ.get("EDYX_ACCESS_TOKEN")
PHYSICS_SYSTEM_PROMPT = """You are an expert physics researcher and teacher.
You are given retrieved scientific material from a physics knowledge base.
Your job:
- Use the retrieved material as grounding evidence
- Ignore irrelevant technical artifacts (paths, array shapes, file names)
- If information is incomplete, use your physics knowledge to complete the explanation
- Do NOT invent specific papers, experiments, or citations
- Produce a clean, coherent, human-readable explanation
Style: Clear, structured, graduate-level physics understanding."""
local_llm = None
def get_local_llm():
global local_llm
if local_llm is None:
print("Loading local fallback model...")
local_llm = Llama(
model_path="/app/model.gguf",
n_ctx=4096,
n_threads=2,
n_batch=128,
)
return local_llm
embedder = TextEmbedding(
model_name="BAAI/bge-large-en-v1.5",
)
class QueryRequest(BaseModel):
question: str
top_k: Optional[int] = 5
max_tokens: Optional[int] = 512
async def verify_token(x_edyx_token: str = Header(None)):
if EDYX_ACCESS_TOKEN and x_edyx_token != EDYX_ACCESS_TOKEN:
raise HTTPException(status_code=403, detail="Unauthorized: Invalid Access Token")
return x_edyx_token
@app.get("/")
def root():
return {"status": "edyx-phy running", "mode": "accelerated-primary"}
def search_qdrant(question: str, top_k: int):
vector = [float(x) for x in next(embedder.embed(question))]
r = requests.post(
f"{QDRANT_URL}/collections/{COLLECTION}/points/search",
headers={
"Content-Type": "application/json",
"api-key": QDRANT_API_KEY,
},
json={
"vector": vector,
"limit": top_k,
"with_payload": True,
},
timeout=30,
)
if r.status_code != 200:
return None, f"Qdrant search failed: {r.text}"
hits = r.json().get("result", [])
collected = []
for h in hits:
payload = h.get("payload", {})
if "content" in payload:
collected.append(str(payload["content"]))
if "text" in payload:
collected.append(str(payload["text"]))
context = "\n\n".join(collected)[:12000]
return context, len(hits)
async def call_service_api(question: str, context: str, max_tokens: int):
if not SERVICE_API_KEY:
raise Exception("Service API key not configured")
user_prompt = f"""CONTEXT (retrieved evidence):
{context}
QUESTION:
{question}
Now produce a high-quality physics explanation that a serious learner would trust."""
async with httpx.AsyncClient(timeout=60.0) as client:
response = await client.post(
SERVICE_API_URL,
headers={
"Content-Type": "application/json",
"Authorization": f"Bearer {SERVICE_API_KEY}"
},
json={
"model": SERVICE_MODEL,
"messages": [
{"role": "system", "content": PHYSICS_SYSTEM_PROMPT},
{"role": "user", "content": user_prompt}
],
"max_tokens": max_tokens,
"temperature": 0.2
}
)
if response.status_code != 200:
raise Exception(f"Service API error: {response.status_code} - {response.text}")
data = response.json()
return data["choices"][0]["message"]["content"]
def call_local_model(question: str, context: str, max_tokens: int):
llm = get_local_llm()
prompt = f"""
You are an expert physics researcher and teacher.
You are given raw, fragmented scientific material retrieved from a large physics knowledge base.
This material may include:
- incomplete sentences
- dataset paths or filenames
- low-level implementation details
- broken or partial explanations
Your job:
- Use the retrieved material as grounding evidence
- Ignore irrelevant technical artifacts (paths, array shapes, file names)
- If the retrieved information is incomplete, use your physics knowledge to complete the explanation
- Do NOT invent specific papers, experiments, or citations
- Do NOT mention datasets, storage paths, or indexing systems
- Produce a clean, coherent, human-readable explanation
Style rules:
- Clear, structured explanation
- Intuitive where possible
- Graduate-level physics understanding
- Text-first (formulas only if they genuinely help)
- No raw fragments, no broken sentences
CONTEXT (retrieved evidence):
{context}
QUESTION:
{question}
Now produce a high-quality physics explanation that a serious learner would trust.
"""
out = llm(
prompt,
max_tokens=max_tokens,
temperature=0.2,
top_p=0.9,
stop=["SOURCE:", "QUESTION:"],
)
return out["choices"][0]["text"].strip()
@app.post("/v1/query", dependencies=[Security(verify_token)])
async def query(req: QueryRequest):
context, sources = search_qdrant(req.question, req.top_k)
if context is None:
return {"error": "Qdrant search failed", "details": sources}
if not context:
return {"answer": "No relevant scientific data found.", "sources_used": 0}
try:
answer = await call_service_api(req.question, context, req.max_tokens)
return {
"answer": answer,
"sources_used": sources,
"source": "primary"
}
except Exception as e:
print(f"Service API failed: {e}, falling back to local model...")
try:
answer = call_local_model(req.question, context, req.max_tokens)
return {
"answer": answer,
"sources_used": sources,
"source": "fallback"
}
except Exception as e:
return {
"answer": f"Error: Both primary and fallback failed. {str(e)}",
"sources_used": 0,
"source": "error"
} |