Adi362 commited on
Commit
14df17b
·
verified ·
1 Parent(s): 7fe20b5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -28
app.py CHANGED
@@ -1,39 +1,55 @@
 
1
  import gradio as gr
2
  from qdrant_client import QdrantClient
3
  from sentence_transformers import SentenceTransformer
4
- from transformers import AutoTokenizer, AutoModelForCausalLM
5
- import torch
6
- import os
7
 
8
  QDRANT_URL = os.environ.get("QDRANT_URL")
9
  QDRANT_API_KEY = os.environ.get("QDRANT_API_KEY")
10
  COLLECTION_NAME = "well_vectors"
11
 
12
- QWEN_MODEL = "Qwen/Qwen2.5-0.5B-Instruct"
13
 
14
  embedder = SentenceTransformer("all-MiniLM-L6-v2")
15
 
 
16
  client = QdrantClient(
17
  url=QDRANT_URL,
18
  api_key=QDRANT_API_KEY
19
  )
20
 
21
- tokenizer = AutoTokenizer.from_pretrained(QWEN_MODEL)
22
- model = AutoModelForCausalLM.from_pretrained(
23
- QWEN_MODEL,
24
- torch_dtype=torch.float16,
25
- device_map="auto"
 
26
  )
27
 
28
- def scientific_query_api(question: str):
29
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  qvec = embedder.encode(question, normalize_embeddings=True)
31
 
 
32
  concepts = client.search(
33
  collection_name=COLLECTION_NAME,
34
  query_vector=qvec,
35
  filter={
36
- "must": [{"key": "type", "match": {"value": "concept"}}]
 
 
37
  },
38
  limit=1
39
  )
@@ -48,12 +64,14 @@ def scientific_query_api(question: str):
48
 
49
  concept = concepts[0]
50
 
 
51
  evidence = client.search(
52
  collection_name=COLLECTION_NAME,
53
  query_vector=concept.vector,
54
  limit=5
55
  )
56
 
 
57
  packet = []
58
  packet.append("Concept definition:")
59
  packet.append(concept.payload["content"])
@@ -64,36 +82,35 @@ def scientific_query_api(question: str):
64
  for e in evidence:
65
  if "dataset" in e.payload:
66
  packet.append(
67
- f"- Dataset: {e.payload['dataset']}, File: {e.payload.get('file','')}"
 
68
  )
69
  sources.add(f"The Well: {e.payload['dataset']}")
70
 
71
  evidence_text = "\n".join(packet)
72
 
73
- prompt = f"""
74
- You are a scientific formatter.
75
-
76
- Rules:
77
- - Use ONLY the information below.
78
- - Do NOT add facts or interpretations.
79
- - Preserve scientific meaning.
80
 
81
  INFORMATION:
82
  {evidence_text}
 
 
83
  """
84
 
85
- inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
86
- outputs = model.generate(
87
- **inputs,
88
- max_new_tokens=300,
89
- do_sample=False
 
 
90
  )
91
 
92
- answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
93
 
94
  return {
95
  "question": question,
96
- "answer": answer.strip(),
97
  "sources": sorted(sources),
98
  "confidence": "grounded"
99
  }
@@ -101,8 +118,7 @@ INFORMATION:
101
  iface = gr.Interface(
102
  fn=scientific_query_api,
103
  inputs=gr.Textbox(label="Scientific Question"),
104
- outputs="json",
105
- allow_flagging="never"
106
  )
107
 
108
  iface.launch(server_name="0.0.0.0", server_port=7860)
 
1
+ import os
2
  import gradio as gr
3
  from qdrant_client import QdrantClient
4
  from sentence_transformers import SentenceTransformer
5
+ from llama_cpp import Llama
6
+
 
7
 
8
  QDRANT_URL = os.environ.get("QDRANT_URL")
9
  QDRANT_API_KEY = os.environ.get("QDRANT_API_KEY")
10
  COLLECTION_NAME = "well_vectors"
11
 
12
+ MODEL_PATH = "/model.gguf"
13
 
14
  embedder = SentenceTransformer("all-MiniLM-L6-v2")
15
 
16
+
17
  client = QdrantClient(
18
  url=QDRANT_URL,
19
  api_key=QDRANT_API_KEY
20
  )
21
 
22
+ llm = Llama(
23
+ model_path=MODEL_PATH,
24
+ n_ctx=4096,
25
+ n_threads=2,
26
+ n_batch=128,
27
+ verbose=False
28
  )
29
 
 
30
 
31
+ SYSTEM_PROMPT = """You are a scientific formatter.
32
+
33
+ Rules:
34
+ - You may ONLY use the provided information.
35
+ - Do NOT add facts, examples, or interpretations.
36
+ - Do NOT speculate.
37
+ - Preserve scientific meaning exactly.
38
+ - If information is insufficient, say so explicitly.
39
+ """
40
+
41
+ def scientific_query_api(question: str):
42
+ # 1. Embed query
43
  qvec = embedder.encode(question, normalize_embeddings=True)
44
 
45
+ # 2. Concept retrieval
46
  concepts = client.search(
47
  collection_name=COLLECTION_NAME,
48
  query_vector=qvec,
49
  filter={
50
+ "must": [
51
+ {"key": "type", "match": {"value": "concept"}}
52
+ ]
53
  },
54
  limit=1
55
  )
 
64
 
65
  concept = concepts[0]
66
 
67
+
68
  evidence = client.search(
69
  collection_name=COLLECTION_NAME,
70
  query_vector=concept.vector,
71
  limit=5
72
  )
73
 
74
+
75
  packet = []
76
  packet.append("Concept definition:")
77
  packet.append(concept.payload["content"])
 
82
  for e in evidence:
83
  if "dataset" in e.payload:
84
  packet.append(
85
+ f"- Dataset: {e.payload['dataset']}, "
86
+ f"File: {e.payload.get('file', '')}"
87
  )
88
  sources.add(f"The Well: {e.payload['dataset']}")
89
 
90
  evidence_text = "\n".join(packet)
91
 
92
+ prompt = f"""{SYSTEM_PROMPT}
 
 
 
 
 
 
93
 
94
  INFORMATION:
95
  {evidence_text}
96
+
97
+ Formatted explanation:
98
  """
99
 
100
+ output = llm(
101
+ prompt,
102
+ max_tokens=300,
103
+ temperature=0.2,
104
+ top_p=0.9,
105
+ repeat_penalty=1.1,
106
+ stop=["INFORMATION:", "Formatted explanation:"]
107
  )
108
 
109
+ answer = output["choices"][0]["text"].strip()
110
 
111
  return {
112
  "question": question,
113
+ "answer": answer,
114
  "sources": sorted(sources),
115
  "confidence": "grounded"
116
  }
 
118
  iface = gr.Interface(
119
  fn=scientific_query_api,
120
  inputs=gr.Textbox(label="Scientific Question"),
121
+ outputs="json"
 
122
  )
123
 
124
  iface.launch(server_name="0.0.0.0", server_port=7860)