Spaces:

RocketFarmStudios
/

CPS-Test-Mobile

Paused

App Files Files Community

Ali2206 commited on Apr 22, 2025

Commit

6e39ead

verified ·

1 Parent(s): affa0af

Update app.py

Browse files

Files changed (1) hide show

app.py +54 -89

app.py CHANGED Viewed

@@ -34,28 +34,24 @@ from txagent.txagent import TxAgent
 # Constants
 MAX_TOKENS = 32768
-CHUNK_SIZE = 10000
 MAX_NEW_TOKENS = 2048
-MAX_BOOKINGS_PER_CHUNK = 5
-def file_hash(path: str) -> str:
-    with open(path, "rb") as f:
-        return hashlib.md5(f.read()).hexdigest()
 def clean_response(text: str) -> str:
     try:
         text = text.encode('utf-8', 'surrogatepass').decode('utf-8')
     except UnicodeError:
         text = text.encode('utf-8', 'replace').decode('utf-8')
     text = re.sub(r"\[.*?\]|\bNone\b", "", text, flags=re.DOTALL)
     text = re.sub(r"\n{3,}", "\n\n", text)
     text = re.sub(r"[^\n#\-\*\w\s\.,:\(\)]+", "", text)
     return text.strip()
 def estimate_tokens(text: str) -> int:
     return len(text) // 3.5
 def process_patient_data(df: pd.DataFrame) -> Dict[str, Any]:
     data = {
         'bookings': defaultdict(list),
@@ -66,7 +62,7 @@ def process_patient_data(df: pd.DataFrame) -> Dict[str, Any]:
         'doctors': set(),
         'timeline': []
     }
     df = df.sort_values('Interview Date')
     for booking, group in df.groupby('Booking Number'):
         for _, row in group.iterrows():
@@ -79,11 +75,11 @@ def process_patient_data(df: pd.DataFrame) -> Dict[str, Any]:
                 'response': str(row['Item Response']),
                 'notes': str(row['Description'])
             }
             data['bookings'][booking].append(entry)
             data['timeline'].append(entry)
             data['doctors'].add(entry['doctor'])
             form_lower = entry['form'].lower()
             if 'medication' in form_lower or 'drug' in form_lower:
                 data['medications'][entry['item']].append(entry)
@@ -93,9 +89,10 @@ def process_patient_data(df: pd.DataFrame) -> Dict[str, Any]:
                 data['tests'][entry['item']].append(entry)
             elif 'procedure' in form_lower or 'surgery' in form_lower:
                 data['procedures'][entry['item']].append(entry)
     return data
 def generate_analysis_prompt(patient_data: Dict[str, Any], bookings: List[str]) -> str:
     prompt_lines = [
         "### Patient Clinical Reasoning Task",
@@ -138,33 +135,14 @@ def generate_analysis_prompt(patient_data: Dict[str, Any], bookings: List[str])
     return "\n".join(prompt_lines)
-def chunk_bookings(patient_data: Dict[str, Any]) -> List[List[str]]:
-    all_bookings = list(patient_data['bookings'].keys())
-    booking_sizes = []
-    for booking in all_bookings:
-        entries = patient_data['bookings'][booking]
-        size = sum(estimate_tokens(str(e)) for e in entries)
-        booking_sizes.append((booking, size))
-    booking_sizes.sort(key=lambda x: x[1], reverse=True)
-    chunks = [[] for _ in range(3)]
-    chunk_sizes = [0, 0, 0]
-    for booking, size in booking_sizes:
-        min_chunk = chunk_sizes.index(min(chunk_sizes))
-        chunks[min_chunk].append(booking)
-        chunk_sizes[min_chunk] += size
-    return chunks
 def init_agent():
     default_tool_path = os.path.abspath("data/new_tool.json")
     target_tool_path = os.path.join(tool_cache_dir, "new_tool.json")
     if not os.path.exists(target_tool_path):
         shutil.copy(default_tool_path, target_tool_path)
     agent = TxAgent(
         model_name="mims-harvard/TxAgent-T1-Llama-3.1-8B",
         rag_model_name="mims-harvard/ToolRAG-T1-GTE-Qwen2-1.5B",
@@ -178,6 +156,7 @@ def init_agent():
     agent.init_model()
     return agent
 def analyze_with_agent(agent, prompt: str) -> str:
     try:
         response = ""
@@ -198,11 +177,11 @@ def analyze_with_agent(agent, prompt: str) -> str:
                 response += clean_response(result) + "\n"
             elif hasattr(result, 'content'):
                 response += clean_response(result.content) + "\n"
         return response.strip()
     except Exception as e:
         return f"Error in analysis: {str(e)}"
 def analyze(file):
     if not file:
         raise gr.Error("Please upload a file")
@@ -212,79 +191,65 @@ def analyze(file):
         patient_data = process_patient_data(df)
         all_bookings = list(patient_data['bookings'].keys())
-        # ✅ Build one full prompt with all bookings
-        prompt = generate_analysis_prompt(patient_data, all_bookings)
-        # ✅ Add holistic reasoning instruction to prompt
-        prompt += "\n\n" + "\n".join([
-            "**Please analyze the entire patient history across all bookings.**",
-            "Look for missed diagnoses, inconsistent notes across different doctors,",
-            "missing follow-ups, or any gaps in care delivery.",
-            "Provide detailed insight into what may have been overlooked."
-        ])
-        # ✅ Run the agent once over the full patient timeline
-        response = analyze_with_agent(agent, prompt)
-        # ✅ Wrap and save the result
-        full_report = f"# 🧠 Full Patient History Analysis\n\n{response}"
-        report_path = os.path.join(
-            report_dir, f"report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.md"
-        )
         with open(report_path, 'w') as f:
             f.write(full_report)
-        yield full_report, report_path
     except Exception as e:
         raise gr.Error(f"Error: {str(e)}")
 def create_ui(agent):
-    with gr.Blocks(theme=gr.themes.Soft(), title="Patient History Analyzer") as demo:
-        gr.Markdown("# 🏥 Patient History Analyzer")
-        with gr.Tabs():
-            with gr.TabItem("Analysis"):
-                with gr.Row():
-                    with gr.Column(scale=1):
-                        file_upload = gr.File(
-                            label="Upload Excel File",
-                            file_types=[".xlsx"],
-                            file_count="single"
-                        )
-                        analyze_btn = gr.Button("Analyze", variant="primary")
-                        status = gr.Markdown("Ready")
-                    with gr.Column(scale=2):
-                        output = gr.Markdown()
-                        report = gr.File(label="Download Report")
-            with gr.TabItem("Instructions"):
-                gr.Markdown("""
-                ## How to Use
-                1. Upload patient history Excel
-                2. Click Analyze
-                3. View/download report
-                **Required Columns:**
-                - Booking Number
-                - Interview Date
-                - Interviewer
-                - Form Name
-                - Form Item
-                - Item Response
-                - Description
-                """)
         analyze_btn.click(
             analyze,
-            inputs=file_upload,
-            outputs=[output, report]
         )
     return demo
 if __name__ == "__main__":
     try:
         agent = init_agent()
@@ -297,4 +262,4 @@ if __name__ == "__main__":
         )
     except Exception as e:
         print(f"Error: {str(e)}")
-        sys.exit(1)

 # Constants
 MAX_TOKENS = 32768
 MAX_NEW_TOKENS = 2048
 def clean_response(text: str) -> str:
     try:
         text = text.encode('utf-8', 'surrogatepass').decode('utf-8')
     except UnicodeError:
         text = text.encode('utf-8', 'replace').decode('utf-8')
     text = re.sub(r"\[.*?\]|\bNone\b", "", text, flags=re.DOTALL)
     text = re.sub(r"\n{3,}", "\n\n", text)
     text = re.sub(r"[^\n#\-\*\w\s\.,:\(\)]+", "", text)
     return text.strip()
 def estimate_tokens(text: str) -> int:
     return len(text) // 3.5
 def process_patient_data(df: pd.DataFrame) -> Dict[str, Any]:
     data = {
         'bookings': defaultdict(list),
         'doctors': set(),
         'timeline': []
     }
     df = df.sort_values('Interview Date')
     for booking, group in df.groupby('Booking Number'):
         for _, row in group.iterrows():
                 'response': str(row['Item Response']),
                 'notes': str(row['Description'])
             }
             data['bookings'][booking].append(entry)
             data['timeline'].append(entry)
             data['doctors'].add(entry['doctor'])
             form_lower = entry['form'].lower()
             if 'medication' in form_lower or 'drug' in form_lower:
                 data['medications'][entry['item']].append(entry)
                 data['tests'][entry['item']].append(entry)
             elif 'procedure' in form_lower or 'surgery' in form_lower:
                 data['procedures'][entry['item']].append(entry)
     return data
 def generate_analysis_prompt(patient_data: Dict[str, Any], bookings: List[str]) -> str:
     prompt_lines = [
         "### Patient Clinical Reasoning Task",
     return "\n".join(prompt_lines)
 def init_agent():
     default_tool_path = os.path.abspath("data/new_tool.json")
     target_tool_path = os.path.join(tool_cache_dir, "new_tool.json")
     if not os.path.exists(target_tool_path):
         shutil.copy(default_tool_path, target_tool_path)
     agent = TxAgent(
         model_name="mims-harvard/TxAgent-T1-Llama-3.1-8B",
         rag_model_name="mims-harvard/ToolRAG-T1-GTE-Qwen2-1.5B",
     agent.init_model()
     return agent
 def analyze_with_agent(agent, prompt: str) -> str:
     try:
         response = ""
                 response += clean_response(result) + "\n"
             elif hasattr(result, 'content'):
                 response += clean_response(result.content) + "\n"
         return response.strip()
     except Exception as e:
         return f"Error in analysis: {str(e)}"
 def analyze(file):
     if not file:
         raise gr.Error("Please upload a file")
         patient_data = process_patient_data(df)
         all_bookings = list(patient_data['bookings'].keys())
+        # Chunking logic based on estimated token limits
+        chunks = []
+        current_chunk = []
+        current_size = 0
+        for booking in all_bookings:
+            booking_entries = patient_data['bookings'][booking]
+            booking_prompt = generate_analysis_prompt(patient_data, [booking])
+            token_count = estimate_tokens(booking_prompt)
+            if current_size + token_count > MAX_TOKENS:
+                if current_chunk:
+                    chunks.append(current_chunk)
+                current_chunk = [booking]
+                current_size = token_count
+            else:
+                current_chunk.append(booking)
+                current_size += token_count
+        if current_chunk:
+            chunks.append(current_chunk)
+        chunk_responses = []
+        for chunk in chunks:
+            prompt = generate_analysis_prompt(patient_data, chunk) + "\n\n" + "\n".join([
+                "**Please analyze this part of the patient history.**",
+                "Focus on identifying patterns, issues, and possible missed opportunities."
+            ])
+            chunk_responses.append(analyze_with_agent(agent, prompt))
+        final_prompt = "\n\n".join(chunk_responses) + "\n\nSummarize the key insights, missed diagnoses, medication issues, inconsistencies and follow-up recommendations in a clear and structured way."
+        final_response = analyze_with_agent(agent, final_prompt)
+        full_report = f"# \U0001f9e0 Full Patient History Analysis\n\n{final_response}"
+        report_path = os.path.join(report_dir, f"report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.md")
         with open(report_path, 'w') as f:
             f.write(full_report)
+        return [("user", "[Excel Uploaded: Processing Analysis...]"), ("assistant", full_report)], report_path
     except Exception as e:
         raise gr.Error(f"Error: {str(e)}")
 def create_ui(agent):
+    with gr.Blocks(title="Patient History Chat") as demo:
+        chatbot = gr.Chatbot(label="Clinical Assistant", show_copy_button=True)
+        file_upload = gr.File(label="Upload Excel File", file_types=[".xlsx"])
+        analyze_btn = gr.Button("🧠 Analyze Patient History")
+        report_output = gr.File(label="Download Report")
         analyze_btn.click(
             analyze,
+            inputs=[file_upload],
+            outputs=[chatbot, report_output]
         )
     return demo
 if __name__ == "__main__":
     try:
         agent = init_agent()
         )
     except Exception as e:
         print(f"Error: {str(e)}")
+        sys.exit(1)