Spaces:

edyxapi
/

Edyx-Phy

Running

App Files Files Community

Adi362 commited on 20 days ago

Commit

ec4e184

verified ·

1 Parent(s): 6fdebfe

Create app.py

Browse files

Files changed (1) hide show

app.py +108 -0

app.py ADDED Viewed

	@@ -0,0 +1,108 @@

+import gradio as gr
+from qdrant_client import QdrantClient
+from sentence_transformers import SentenceTransformer
+from transformers import AutoTokenizer, AutoModelForCausalLM
+import torch
+import os
+QDRANT_URL = os.environ.get("QDRANT_URL")
+QDRANT_API_KEY = os.environ.get("QDRANT_API_KEY")
+COLLECTION_NAME = "well_vectors"
+QWEN_MODEL = "Qwen/Qwen2.5-0.5B-Instruct"
+embedder = SentenceTransformer("all-MiniLM-L6-v2")
+client = QdrantClient(
+    url=QDRANT_URL,
+    api_key=QDRANT_API_KEY
+)
+tokenizer = AutoTokenizer.from_pretrained(QWEN_MODEL)
+model = AutoModelForCausalLM.from_pretrained(
+    QWEN_MODEL,
+    torch_dtype=torch.float16,
+    device_map="auto"
+)
+def scientific_query_api(question: str):
+    qvec = embedder.encode(question, normalize_embeddings=True)
+    concepts = client.search(
+        collection_name=COLLECTION_NAME,
+        query_vector=qvec,
+        filter={
+            "must": [{"key": "type", "match": {"value": "concept"}}]
+        },
+        limit=1
+    )
+    if not concepts:
+        return {
+            "question": question,
+            "answer": "No indexed scientific material is available for this query.",
+            "sources": [],
+            "confidence": "insufficient_data"
+        }
+    concept = concepts[0]
+    evidence = client.search(
+        collection_name=COLLECTION_NAME,
+        query_vector=concept.vector,
+        limit=5
+    )
+    packet = []
+    packet.append("Concept definition:")
+    packet.append(concept.payload["content"])
+    packet.append("\nScientific context from indexed data:")
+    sources = set(["Curated physics concepts"])
+    for e in evidence:
+        if "dataset" in e.payload:
+            packet.append(
+                f"- Dataset: {e.payload['dataset']}, File: {e.payload.get('file','')}"
+            )
+            sources.add(f"The Well: {e.payload['dataset']}")
+    evidence_text = "\n".join(packet)
+    prompt = f"""
+You are a scientific formatter.
+Rules:
+- Use ONLY the information below.
+- Do NOT add facts or interpretations.
+- Preserve scientific meaning.
+INFORMATION:
+{evidence_text}
+"""
+    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
+    outputs = model.generate(
+        **inputs,
+        max_new_tokens=300,
+        do_sample=False
+    )
+    answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    return {
+        "question": question,
+        "answer": answer.strip(),
+        "sources": sorted(sources),
+        "confidence": "grounded"
+    }
+iface = gr.Interface(
+    fn=scientific_query_api,
+    inputs=gr.Textbox(label="Scientific Question"),
+    outputs="json",
+    allow_flagging="never"
+)
+iface.launch(server_name="0.0.0.0", server_port=7860)