Spaces:
Sleeping
Sleeping
saving changes for summary gen
Browse files
.gitignore
CHANGED
|
@@ -4,3 +4,6 @@ tools/__pycache__/retriever_tool.cpython-312.pyc
|
|
| 4 |
tools/__pycache__/search_tool.cpython-312.pyc
|
| 5 |
__pycache__/agent.cpython-312.pyc
|
| 6 |
data/validation_set_surgery_with_web.numbers
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
tools/__pycache__/search_tool.cpython-312.pyc
|
| 5 |
__pycache__/agent.cpython-312.pyc
|
| 6 |
data/validation_set_surgery_with_web.numbers
|
| 7 |
+
__pycache__/
|
| 8 |
+
*.pyc
|
| 9 |
+
debug.log
|
__pycache__/agent.cpython-312.pyc
DELETED
|
Binary file (12.2 kB)
|
|
|
tools/__pycache__/retriever_tool.cpython-312.pyc
DELETED
|
Binary file (7.48 kB)
|
|
|
tools/retriever_tool.py
CHANGED
|
@@ -83,6 +83,36 @@ class DocumentRetriever:
|
|
| 83 |
faiss.normalize_L2(batch_embeddings)
|
| 84 |
self.index.add(np.array(batch_embeddings))
|
| 85 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
def query(self, question, include_metadata=True):
|
| 87 |
try:
|
| 88 |
q_embedding = self.model.encode([question])
|
|
@@ -98,7 +128,9 @@ class DocumentRetriever:
|
|
| 98 |
|
| 99 |
if include_metadata and idx < len(self.metadata):
|
| 100 |
meta = self.metadata[idx]
|
| 101 |
-
|
|
|
|
|
|
|
| 102 |
else:
|
| 103 |
doc_info = f"[Document {i+1}] (Score: {score:.2f})\n\n{doc_text}"
|
| 104 |
|
|
|
|
| 83 |
faiss.normalize_L2(batch_embeddings)
|
| 84 |
self.index.add(np.array(batch_embeddings))
|
| 85 |
|
| 86 |
+
# def query(self, question, include_metadata=True):
|
| 87 |
+
# try:
|
| 88 |
+
# q_embedding = self.model.encode([question])
|
| 89 |
+
# faiss.normalize_L2(q_embedding)
|
| 90 |
+
|
| 91 |
+
# k = min(self.top_k * 2, len(self.texts))
|
| 92 |
+
# scores, indices = self.index.search(np.array(q_embedding), k)
|
| 93 |
+
|
| 94 |
+
# results = []
|
| 95 |
+
# for i, (score, idx) in enumerate(zip(scores[0], indices[0])):
|
| 96 |
+
# if idx != -1 and score >= self.similarity_threshold and i < self.top_k:
|
| 97 |
+
# doc_text = self.texts[idx]
|
| 98 |
+
|
| 99 |
+
# if include_metadata and idx < len(self.metadata):
|
| 100 |
+
# meta = self.metadata[idx]
|
| 101 |
+
# doc_info = f"[Document {i+1}] (Score: {score:.2f}, Specialty: {meta.get('medical_specialty', 'Unknown')}, Sample: {meta.get('sample_name', 'Unknown')})\n\n{doc_text}"
|
| 102 |
+
# else:
|
| 103 |
+
# doc_info = f"[Document {i+1}] (Score: {score:.2f})\n\n{doc_text}"
|
| 104 |
+
|
| 105 |
+
# results.append(doc_info)
|
| 106 |
+
|
| 107 |
+
# gc.collect()
|
| 108 |
+
|
| 109 |
+
# if not results:
|
| 110 |
+
# return "No relevant documents found for this query."
|
| 111 |
+
|
| 112 |
+
# return "\n\n" + "-"*80 + "\n\n".join(results)
|
| 113 |
+
# except Exception as e:
|
| 114 |
+
# return f"Error during retrieval: {str(e)}"
|
| 115 |
+
|
| 116 |
def query(self, question, include_metadata=True):
|
| 117 |
try:
|
| 118 |
q_embedding = self.model.encode([question])
|
|
|
|
| 128 |
|
| 129 |
if include_metadata and idx < len(self.metadata):
|
| 130 |
meta = self.metadata[idx]
|
| 131 |
+
# Add description to the output
|
| 132 |
+
description = meta.get('description', 'No description available')
|
| 133 |
+
doc_info = f"[Document {i+1}] (Score: {score:.2f})\nSpecialty: {meta.get('medical_specialty', 'Unknown')}\nSample: {meta.get('sample_name', 'Unknown')}\nDescription: {description}\n\n{doc_text}"
|
| 134 |
else:
|
| 135 |
doc_info = f"[Document {i+1}] (Score: {score:.2f})\n\n{doc_text}"
|
| 136 |
|