File size: 6,542 Bytes
529e4dd
5e4062e
306e869
b348ed1
529e4dd
6a1f3e2
 
529e4dd
 
 
 
5e4062e
529e4dd
5e4062e
529e4dd
b348ed1
 
 
 
 
6a1f3e2
306e869
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6a1f3e2
 
 
529e4dd
 
 
6a1f3e2
 
529e4dd
b348ed1
 
 
 
 
529e4dd
 
b348ed1
529e4dd
306e869
 
6a1f3e2
 
 
 
 
 
 
 
 
306e869
6a1f3e2
 
 
 
 
 
306e869
6a1f3e2
 
306e869
b47ad05
6a1f3e2
 
 
b47ad05
6a1f3e2
b47ad05
6a1f3e2
b47ad05
306e869
 
b348ed1
 
 
306e869
 
 
 
 
 
 
 
 
b348ed1
306e869
 
b348ed1
306e869
 
b348ed1
306e869
 
 
 
 
 
 
 
 
 
b348ed1
306e869
 
 
 
 
 
 
6a1f3e2
6c5e2ed
 
 
 
 
 
 
 
 
 
b348ed1
6c5e2ed
 
 
 
 
 
 
 
 
 
6a1f3e2
b47ad05
306e869
6c5e2ed
6a1f3e2
 
 
 
306e869
6a1f3e2
 
b47ad05
6a1f3e2
306e869
 
6a1f3e2
b348ed1
306e869
 
 
 
 
 
 
 
 
 
 
b348ed1
306e869
 
 
 
 
 
b348ed1
306e869
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
import os
import requests
import httpx
from fastapi import FastAPI, HTTPException, Security, Header
from pydantic import BaseModel
from typing import Optional
from llama_cpp import Llama
from fastembed import TextEmbedding

app = FastAPI()

QDRANT_URL = os.environ["QDRANT_URL"].rstrip("/")
QDRANT_API_KEY = os.environ["QDRANT_API_KEY"]
COLLECTION = "well_vectors"

SERVICE_API_KEY = os.environ.get("SERVICE_API_KEY")
SERVICE_API_URL = "https://api.groq.com/openai/v1/chat/completions"
SERVICE_MODEL = "llama-3.3-70b-versatile"

EDYX_ACCESS_TOKEN = os.environ.get("EDYX_ACCESS_TOKEN")

PHYSICS_SYSTEM_PROMPT = """You are an expert physics researcher and teacher.
You are given retrieved scientific material from a physics knowledge base.
Your job:
- Use the retrieved material as grounding evidence
- Ignore irrelevant technical artifacts (paths, array shapes, file names)
- If information is incomplete, use your physics knowledge to complete the explanation
- Do NOT invent specific papers, experiments, or citations
- Produce a clean, coherent, human-readable explanation
Style: Clear, structured, graduate-level physics understanding."""

local_llm = None

def get_local_llm():
    global local_llm
    if local_llm is None:
        print("Loading local fallback model...")
        local_llm = Llama(
            model_path="/app/model.gguf",
            n_ctx=4096,
            n_threads=2,
            n_batch=128,
        )
    return local_llm

embedder = TextEmbedding(
    model_name="BAAI/bge-large-en-v1.5",
)

class QueryRequest(BaseModel):
    question: str
    top_k: Optional[int] = 5
    max_tokens: Optional[int] = 512

async def verify_token(x_edyx_token: str = Header(None)):
    if EDYX_ACCESS_TOKEN and x_edyx_token != EDYX_ACCESS_TOKEN:
        raise HTTPException(status_code=403, detail="Unauthorized: Invalid Access Token")
    return x_edyx_token

@app.get("/")
def root():
    return {"status": "edyx-phy running", "mode": "accelerated-primary"}

def search_qdrant(question: str, top_k: int):
    vector = [float(x) for x in next(embedder.embed(question))]

    r = requests.post(
        f"{QDRANT_URL}/collections/{COLLECTION}/points/search",
        headers={
            "Content-Type": "application/json",
            "api-key": QDRANT_API_KEY,
        },
        json={
            "vector": vector,
            "limit": top_k,
            "with_payload": True,
        },
        timeout=30,
    )

    if r.status_code != 200:
        return None, f"Qdrant search failed: {r.text}"

    hits = r.json().get("result", [])
    
    collected = []
    for h in hits:
        payload = h.get("payload", {})
        if "content" in payload:
            collected.append(str(payload["content"]))
        if "text" in payload:
            collected.append(str(payload["text"]))

    context = "\n\n".join(collected)[:12000]
    return context, len(hits)

async def call_service_api(question: str, context: str, max_tokens: int):
    if not SERVICE_API_KEY:
        raise Exception("Service API key not configured")
    
    user_prompt = f"""CONTEXT (retrieved evidence):
{context}
QUESTION:
{question}
Now produce a high-quality physics explanation that a serious learner would trust."""

    async with httpx.AsyncClient(timeout=60.0) as client:
        response = await client.post(
            SERVICE_API_URL,
            headers={
                "Content-Type": "application/json",
                "Authorization": f"Bearer {SERVICE_API_KEY}"
            },
            json={
                "model": SERVICE_MODEL,
                "messages": [
                    {"role": "system", "content": PHYSICS_SYSTEM_PROMPT},
                    {"role": "user", "content": user_prompt}
                ],
                "max_tokens": max_tokens,
                "temperature": 0.2
            }
        )
        
        if response.status_code != 200:
            raise Exception(f"Service API error: {response.status_code} - {response.text}")
        
        data = response.json()
        return data["choices"][0]["message"]["content"]

def call_local_model(question: str, context: str, max_tokens: int):
    llm = get_local_llm()
    
    prompt = f"""
You are an expert physics researcher and teacher.
You are given raw, fragmented scientific material retrieved from a large physics knowledge base.
This material may include:
- incomplete sentences
- dataset paths or filenames
- low-level implementation details
- broken or partial explanations
Your job:
- Use the retrieved material as grounding evidence
- Ignore irrelevant technical artifacts (paths, array shapes, file names)
- If the retrieved information is incomplete, use your physics knowledge to complete the explanation
- Do NOT invent specific papers, experiments, or citations
- Do NOT mention datasets, storage paths, or indexing systems
- Produce a clean, coherent, human-readable explanation
Style rules:
- Clear, structured explanation
- Intuitive where possible
- Graduate-level physics understanding
- Text-first (formulas only if they genuinely help)
- No raw fragments, no broken sentences
CONTEXT (retrieved evidence):
{context}
QUESTION:
{question}
Now produce a high-quality physics explanation that a serious learner would trust.
"""

    out = llm(
        prompt,
        max_tokens=max_tokens,
        temperature=0.2,
        top_p=0.9,
        stop=["SOURCE:", "QUESTION:"],
    )
    
    return out["choices"][0]["text"].strip()

@app.post("/v1/query", dependencies=[Security(verify_token)])
async def query(req: QueryRequest):

    context, sources = search_qdrant(req.question, req.top_k)
    
    if context is None:
        return {"error": "Qdrant search failed", "details": sources}
    
    if not context:
        return {"answer": "No relevant scientific data found.", "sources_used": 0}

    try:
        answer = await call_service_api(req.question, context, req.max_tokens)
        return {
            "answer": answer,
            "sources_used": sources,
            "source": "primary"
        }
    except Exception as e:
        print(f"Service API failed: {e}, falling back to local model...")
    

    try:
        answer = call_local_model(req.question, context, req.max_tokens)
        return {
            "answer": answer,
            "sources_used": sources,
            "source": "fallback"
        }
    except Exception as e:
        return {
            "answer": f"Error: Both primary and fallback failed. {str(e)}",
            "sources_used": 0,
            "source": "error"
        }