Spaces:

atrmkj
/

medTranscript_QA_agent

Sleeping

App Files Files Community

atrmkj commited on Apr 30, 2025

Commit

c27cd83

1 Parent(s): a5ddb69

saving changes for summary gen

Browse files

Files changed (4) hide show

.gitignore +3 -0
__pycache__/agent.cpython-312.pyc +0 -0
tools/__pycache__/retriever_tool.cpython-312.pyc +0 -0
tools/retriever_tool.py +33 -1

.gitignore CHANGED Viewed

@@ -4,3 +4,6 @@ tools/__pycache__/retriever_tool.cpython-312.pyc
 tools/__pycache__/search_tool.cpython-312.pyc
 __pycache__/agent.cpython-312.pyc
 data/validation_set_surgery_with_web.numbers

 tools/__pycache__/search_tool.cpython-312.pyc
 __pycache__/agent.cpython-312.pyc
 data/validation_set_surgery_with_web.numbers
+__pycache__/
+*.pyc
+debug.log

__pycache__/agent.cpython-312.pyc DELETED Viewed

Binary file (12.2 kB)

tools/__pycache__/retriever_tool.cpython-312.pyc DELETED Viewed

Binary file (7.48 kB)

tools/retriever_tool.py CHANGED Viewed

@@ -83,6 +83,36 @@ class DocumentRetriever:
             faiss.normalize_L2(batch_embeddings)
             self.index.add(np.array(batch_embeddings))
     def query(self, question, include_metadata=True):
         try:
             q_embedding = self.model.encode([question])
@@ -98,7 +128,9 @@ class DocumentRetriever:
                     if include_metadata and idx < len(self.metadata):
                         meta = self.metadata[idx]
-                        doc_info = f"[Document {i+1}] (Score: {score:.2f}, Specialty: {meta.get('medical_specialty', 'Unknown')}, Sample: {meta.get('sample_name', 'Unknown')})\n\n{doc_text}"
                     else:
                         doc_info = f"[Document {i+1}] (Score: {score:.2f})\n\n{doc_text}"

             faiss.normalize_L2(batch_embeddings)
             self.index.add(np.array(batch_embeddings))
+    # def query(self, question, include_metadata=True):
+    #     try:
+    #         q_embedding = self.model.encode([question])
+    #         faiss.normalize_L2(q_embedding)
+    #         k = min(self.top_k * 2, len(self.texts))
+    #         scores, indices = self.index.search(np.array(q_embedding), k)
+    #         results = []
+    #         for i, (score, idx) in enumerate(zip(scores[0], indices[0])):
+    #             if idx != -1 and score >= self.similarity_threshold and i < self.top_k:
+    #                 doc_text = self.texts[idx]
+    #                 if include_metadata and idx < len(self.metadata):
+    #                     meta = self.metadata[idx]
+    #                     doc_info = f"[Document {i+1}] (Score: {score:.2f}, Specialty: {meta.get('medical_specialty', 'Unknown')}, Sample: {meta.get('sample_name', 'Unknown')})\n\n{doc_text}"
+    #                 else:
+    #                     doc_info = f"[Document {i+1}] (Score: {score:.2f})\n\n{doc_text}"
+    #                 results.append(doc_info)
+    #         gc.collect()
+    #         if not results:
+    #             return "No relevant documents found for this query."
+    #         return "\n\n" + "-"*80 + "\n\n".join(results)
+    #     except Exception as e:
+    #         return f"Error during retrieval: {str(e)}"
     def query(self, question, include_metadata=True):
         try:
             q_embedding = self.model.encode([question])
                     if include_metadata and idx < len(self.metadata):
                         meta = self.metadata[idx]
+                        # Add description to the output
+                        description = meta.get('description', 'No description available')
+                        doc_info = f"[Document {i+1}] (Score: {score:.2f})\nSpecialty: {meta.get('medical_specialty', 'Unknown')}\nSample: {meta.get('sample_name', 'Unknown')}\nDescription: {description}\n\n{doc_text}"
                     else:
                         doc_info = f"[Document {i+1}] (Score: {score:.2f})\n\n{doc_text}"