Spaces:

edyxapi
/

Edyx-Phy

Sleeping

App Files Files Community

Edyx-Phy / app.py

Adi362

Update app.py

b348ed1 verified 6 days ago

raw

history blame contribute delete

6.54 kB

	import os
	import requests
	import httpx
	from fastapi import FastAPI, HTTPException, Security, Header
	from pydantic import BaseModel
	from typing import Optional
	from llama_cpp import Llama
	from fastembed import TextEmbedding

	app = FastAPI()

	QDRANT_URL = os.environ["QDRANT_URL"].rstrip("/")
	QDRANT_API_KEY = os.environ["QDRANT_API_KEY"]
	COLLECTION = "well_vectors"

	SERVICE_API_KEY = os.environ.get("SERVICE_API_KEY")
	SERVICE_API_URL = "https://api.groq.com/openai/v1/chat/completions"
	SERVICE_MODEL = "llama-3.3-70b-versatile"

	EDYX_ACCESS_TOKEN = os.environ.get("EDYX_ACCESS_TOKEN")

	PHYSICS_SYSTEM_PROMPT = """You are an expert physics researcher and teacher.
	You are given retrieved scientific material from a physics knowledge base.
	Your job:
	- Use the retrieved material as grounding evidence
	- Ignore irrelevant technical artifacts (paths, array shapes, file names)
	- If information is incomplete, use your physics knowledge to complete the explanation
	- Do NOT invent specific papers, experiments, or citations
	- Produce a clean, coherent, human-readable explanation
	Style: Clear, structured, graduate-level physics understanding."""

	local_llm = None

	def get_local_llm():
	global local_llm
	if local_llm is None:
	print("Loading local fallback model...")
	local_llm = Llama(
	model_path="/app/model.gguf",
	n_ctx=4096,
	n_threads=2,
	n_batch=128,
	)
	return local_llm

	embedder = TextEmbedding(
	model_name="BAAI/bge-large-en-v1.5",
	)

	class QueryRequest(BaseModel):
	question: str
	top_k: Optional[int] = 5
	max_tokens: Optional[int] = 512

	async def verify_token(x_edyx_token: str = Header(None)):
	if EDYX_ACCESS_TOKEN and x_edyx_token != EDYX_ACCESS_TOKEN:
	raise HTTPException(status_code=403, detail="Unauthorized: Invalid Access Token")
	return x_edyx_token

	@app.get("/")
	def root():
	return {"status": "edyx-phy running", "mode": "accelerated-primary"}

	def search_qdrant(question: str, top_k: int):
	vector = [float(x) for x in next(embedder.embed(question))]

	r = requests.post(
	f"{QDRANT_URL}/collections/{COLLECTION}/points/search",
	headers={
	"Content-Type": "application/json",
	"api-key": QDRANT_API_KEY,
	},
	json={
	"vector": vector,
	"limit": top_k,
	"with_payload": True,
	},
	timeout=30,
	)

	if r.status_code != 200:
	return None, f"Qdrant search failed: {r.text}"

	hits = r.json().get("result", [])

	collected = []
	for h in hits:
	payload = h.get("payload", {})
	if "content" in payload:
	collected.append(str(payload["content"]))
	if "text" in payload:
	collected.append(str(payload["text"]))

	context = "\n\n".join(collected)[:12000]
	return context, len(hits)

	async def call_service_api(question: str, context: str, max_tokens: int):
	if not SERVICE_API_KEY:
	raise Exception("Service API key not configured")

	user_prompt = f"""CONTEXT (retrieved evidence):
	{context}
	QUESTION:
	{question}
	Now produce a high-quality physics explanation that a serious learner would trust."""

	async with httpx.AsyncClient(timeout=60.0) as client:
	response = await client.post(
	SERVICE_API_URL,
	headers={
	"Content-Type": "application/json",
	"Authorization": f"Bearer {SERVICE_API_KEY}"
	},
	json={
	"model": SERVICE_MODEL,
	"messages": [
	{"role": "system", "content": PHYSICS_SYSTEM_PROMPT},
	{"role": "user", "content": user_prompt}
	],
	"max_tokens": max_tokens,
	"temperature": 0.2
	}
	)

	if response.status_code != 200:
	raise Exception(f"Service API error: {response.status_code} - {response.text}")

	data = response.json()
	return data["choices"][0]["message"]["content"]

	def call_local_model(question: str, context: str, max_tokens: int):
	llm = get_local_llm()

	prompt = f"""
	You are an expert physics researcher and teacher.
	You are given raw, fragmented scientific material retrieved from a large physics knowledge base.
	This material may include:
	- incomplete sentences
	- dataset paths or filenames
	- low-level implementation details
	- broken or partial explanations
	Your job:
	- Use the retrieved material as grounding evidence
	- Ignore irrelevant technical artifacts (paths, array shapes, file names)
	- If the retrieved information is incomplete, use your physics knowledge to complete the explanation
	- Do NOT invent specific papers, experiments, or citations
	- Do NOT mention datasets, storage paths, or indexing systems
	- Produce a clean, coherent, human-readable explanation
	Style rules:
	- Clear, structured explanation
	- Intuitive where possible
	- Graduate-level physics understanding
	- Text-first (formulas only if they genuinely help)
	- No raw fragments, no broken sentences
	CONTEXT (retrieved evidence):
	{context}
	QUESTION:
	{question}
	Now produce a high-quality physics explanation that a serious learner would trust.
	"""

	out = llm(
	prompt,
	max_tokens=max_tokens,
	temperature=0.2,
	top_p=0.9,
	stop=["SOURCE:", "QUESTION:"],
	)

	return out["choices"][0]["text"].strip()

	@app.post("/v1/query", dependencies=[Security(verify_token)])
	async def query(req: QueryRequest):

	context, sources = search_qdrant(req.question, req.top_k)

	if context is None:
	return {"error": "Qdrant search failed", "details": sources}

	if not context:
	return {"answer": "No relevant scientific data found.", "sources_used": 0}

	try:
	answer = await call_service_api(req.question, context, req.max_tokens)
	return {
	"answer": answer,
	"sources_used": sources,
	"source": "primary"
	}
	except Exception as e:
	print(f"Service API failed: {e}, falling back to local model...")


	try:
	answer = call_local_model(req.question, context, req.max_tokens)
	return {
	"answer": answer,
	"sources_used": sources,
	"source": "fallback"
	}
	except Exception as e:
	return {
	"answer": f"Error: Both primary and fallback failed. {str(e)}",
	"sources_used": 0,
	"source": "error"
	}