Spaces:

RocketFarmStudios
/

CPS-Test-Mobile

Paused

App Files Files Community

Ali2206 commited on Apr 22

Commit

d16299c

verified ·

1 Parent(s): 70e5ae6

Update app.py

Browse files

Files changed (1) hide show

app.py +320 -292

app.py CHANGED Viewed

@@ -1,311 +1,262 @@
 import sys
 import os
 import pandas as pd
 import gradio as gr
-import re
 import hashlib
 import shutil
 from datetime import datetime
 from collections import defaultdict
-from typing import List, Dict, Tuple
-# Configuration
-WORKING_DIR = os.getcwd()
-REPORT_DIR = os.path.join(WORKING_DIR, "reports")
-os.makedirs(REPORT_DIR, exist_ok=True)
-# Model configuration
-MODEL_CACHE_DIR = os.path.join(WORKING_DIR, "model_cache")
-os.makedirs(MODEL_CACHE_DIR, exist_ok=True)
-os.environ["HF_HOME"] = MODEL_CACHE_DIR
-os.environ["TRANSFORMERS_CACHE"] = MODEL_CACHE_DIR
-# Import TxAgent after setting up environment
-sys.path.append(os.path.join(WORKING_DIR, "src"))
 from txagent.txagent import TxAgent
-class PatientHistoryAnalyzer:
-    def __init__(self):
-        self.max_token_length = 2000
-        self.max_text_length = 500
-        self.agent = self._initialize_agent()
-    def _initialize_agent(self):
-        """Initialize the TxAgent with proper configuration"""
-        tool_path = os.path.join(WORKING_DIR, "data", "new_tool.json")
-        if not os.path.exists(tool_path):
-            raise FileNotFoundError(f"Tool file not found at {tool_path}")
-        return TxAgent(
-            model_name="mims-harvard/TxAgent-T1-Llama-3.1-8B",
-            rag_model_name="mims-harvard/ToolRAG-T1-GTE-Qwen2-1.5B",
-            tool_files_dict={"new_tool": tool_path},
-            force_finish=True,
-            enable_checker=True,
-            step_rag_num=4,
-            seed=100,
-            additional_default_tools=[],
-        )
-    def clean_text(self, text: str) -> str:
-        """Clean and normalize text fields"""
-        if not isinstance(text, str):
-            text = str(text)
-        text = re.sub(r'\s+', ' ', text).strip()
-        return text[:self.max_text_length]
-    def process_excel(self, file_path: str) -> Dict[str, List]:
-        """Process Excel file into structured patient data"""
-        try:
-            df = pd.read_excel(file_path)
-            df = df.sort_values('Interview Date')
-            data = {
-                'timeline': [],
-                'medications': defaultdict(list),
-                'diagnoses': defaultdict(list),
-                'tests': defaultdict(list),
-                'doctors': set(),
-                'all_entries': []
             }
-            for _, row in df.iterrows():
-                entry = {
-                    'date': self.clean_text(row.get('Interview Date', '')),
-                    'doctor': self.clean_text(row.get('Interviewer', '')),
-                    'form': self.clean_text(row.get('Form Name', '')),
-                    'item': self.clean_text(row.get('Form Item', '')),
-                    'response': self.clean_text(row.get('Item Response', '')),
-                    'notes': self.clean_text(row.get('Description', ''))
-                }
-                data['timeline'].append(entry)
-                data['doctors'].add(entry['doctor'])
-                data['all_entries'].append(entry)
-                form_lower = entry['form'].lower()
-                if 'medication' in form_lower or 'drug' in form_lower:
-                    data['medications'][entry['item']].append(entry)
-                elif 'diagnosis' in form_lower:
-                    data['diagnoses'][entry['item']].append(entry)
-                elif 'test' in form_lower or 'lab' in form_lower:
-                    data['tests'][entry['item']].append(entry)
-            return data
-        except Exception as e:
-            raise ValueError(f"Error processing Excel file: {str(e)}")
-    def generate_analysis_prompt(self, patient_data: Dict) -> List[Dict]:
-        """Generate analysis prompts that respect token limits"""
-        prompts = []
-        # Current Status Prompt
-        current_prompt = self._create_current_status_prompt(patient_data)
-        prompts.append({
-            'type': 'current_status',
-            'content': current_prompt
-        })
-        # Historical Analysis Prompt
-        if len(patient_data['all_entries']) > 10:
-            history_prompt = self._create_historical_prompt(patient_data)
-            prompts.append({
-                'type': 'historical',
-                'content': history_prompt
-            })
-        # Medication-Specific Prompt
-        if len(patient_data['medications']) > 3:
-            meds_prompt = self._create_medication_prompt(patient_data)
-            prompts.append({
-                'type': 'medications',
-                'content': meds_prompt
-            })
-        return prompts
-    def _create_current_status_prompt(self, data: Dict) -> str:
-        """Create prompt for current patient status"""
-        recent_entries = data['timeline'][-10:]
-        prompt_lines = [
-            "**Comprehensive Patient Status Analysis**",
-            "Focus on RECENT appointments and CURRENT health status.",
-            "Analyze for:",
-            "- Medication consistency",
-            "- Diagnostic agreement between providers",
-            "- Recent concerning findings",
-            "- Immediate follow-up needs",
-            "",
-            "**Recent Timeline (last 10 entries):**"
-        ]
-        for entry in recent_entries:
-            prompt_lines.append(
-                f"- {entry['date']}: {entry['form']} - {entry['item']} = {entry['response']} (by {entry['doctor']})"
-            )
-        prompt_lines.extend([
-            "",
-            "**Current Medications:**",
-            *[f"- {med}: {entries[-1]['response']} (last updated {entries[-1]['date']})"
-              for med, entries in data['medications'].items()],
-            "",
-            "**Active Diagnoses:**",
-            *[f"- {diag}: {entries[-1]['response']} (last updated {entries[-1]['date']})"
-              for diag, entries in data['diagnoses'].items()],
-            "",
-            "**Required Output Format:**",
-            "### Summary of Current Status",
-            "### Medication Review",
-            "### Diagnostic Consistency",
-            "### Urgent Concerns",
-            "### Recommended Actions"
-        ])
-        return "\n".join(prompt_lines)
-    def _create_historical_prompt(self, data: Dict) -> str:
-        """Create prompt for historical analysis"""
-        return "\n".join([
-            "**Historical Patient Analysis**",
-            "Focus on LONG-TERM PATTERNS and HISTORY.",
-            "",
-            "**Key Analysis Points:**",
-            "- Treatment changes over time",
-            "- Recurring symptoms/issues",
-            "- Diagnostic evolution",
-            "- Medication history",
-            "",
-            "**Historical Timeline (condensed):**",
-            *[f"- {entry['date'][:7]}: {entry['form']} - {entry['response']}"
-              for entry in data['all_entries'][:-10]],
-            "",
-            "**Required Output Format:**",
-            "### Historical Patterns",
-            "### Treatment Evolution",
-            "### Chronic Issues",
-            "### Long-term Recommendations"
-        ])
-    def _create_medication_prompt(self, data: Dict) -> str:
-        """Create medication-specific prompt"""
-        return "\n".join([
-            "**Medication-Specific Analysis**",
-            "Focus on MEDICATION HISTORY and POTENTIAL ISSUES.",
-            "",
-            "**Medication History:**",
-            *[f"- {med}: " + ", ".join(
-                f"{e['date']}: {e['response']} (by {e['doctor']})"
-                for e in entries
-              ) for med, entries in data['medications'].items()],
-            "",
-            "**Analysis Focus:**",
-            "- Potential interactions",
-            "- Dosage changes",
-            "- Prescriber patterns",
-            "- Adherence issues",
-            "",
-            "**Required Output Format:**",
-            "### Medication Summary",
-            "### Potential Issues",
-            "### Prescriber Patterns",
-            "### Recommendations"
-        ])
-    def _call_agent(self, prompt: str) -> str:
-        """Call TxAgent with proper error handling"""
-        try:
-            response = ""
-            for result in self.agent.run_gradio_chat(
-                message=prompt,
-                history=[],
-                temperature=0.2,
-                max_new_tokens=1024,
-                max_token=2048,
-                call_agent=False,
-                conversation=[],
-            ):
-                if isinstance(result, list):
-                    for r in result:
-                        if hasattr(r, 'content') and r.content:
-                            response += r.content + "\n"
-                elif isinstance(result, str):
-                    response += result + "\n"
-            return response.strip()
-        except Exception as e:
-            return f"Error in model response: {str(e)}"
-    def generate_report(self, analysis_results: List[str]) -> Tuple[str, str]:
-        """Combine analysis results into final report"""
-        report = [
-            "# Comprehensive Patient History Analysis",
-            f"**Generated on**: {datetime.now().strftime('%Y-%m-%d %H:%M')}",
-            ""
-        ]
-        for result in analysis_results:
-            report.extend(["", "---", "", result])
-        report.extend([
-            "",
-            "## Overall Clinical Summary",
-            "This report combines analyses of:",
-            "- Current health status",
-            "- Historical patterns",
-            "- Medication history",
-            "",
-            "**Key Takeaways:**",
-            "[Generated summary of most critical findings would appear here]"
-        ])
-        full_report = "\n".join(report)
-        # Save to file in working directory
-        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-        report_filename = f"patient_report_{timestamp}.md"
-        report_path = os.path.join(REPORT_DIR, report_filename)
-        with open(report_path, 'w') as f:
-            f.write(full_report)
-        return full_report, report_path
-    def analyze(self, file_path: str) -> Tuple[str, str]:
-        """Main analysis workflow"""
-        try:
-            patient_data = self.process_excel(file_path)
-            prompts = self.generate_analysis_prompt(patient_data)
-            # Call TxAgent for each prompt
-            analysis_results = []
-            for prompt in prompts:
-                response = self._call_agent(prompt['content'])
-                analysis_results.append(response)
-            return self.generate_report(analysis_results)
-        except Exception as e:
-            return f"Error during analysis: {str(e)}", ""
-def create_interface():
-    analyzer = PatientHistoryAnalyzer()
-    with gr.Blocks(title="Patient History Analyzer", theme=gr.themes.Soft()) as demo:
         gr.Markdown("# 🏥 Comprehensive Patient History Analysis")
-        with gr.Tabs():
             with gr.TabItem("Analysis"):
                 with gr.Row():
                     with gr.Column(scale=1):
-                        file_input = gr.File(
-                            label="Upload Patient Records (Excel)",
                             file_types=[".xlsx"],
-                            type="filepath"
                         )
-                        analyze_btn = gr.Button("Analyze Full History", variant="primary")
                     with gr.Column(scale=2):
                         output_display = gr.Markdown(
@@ -321,25 +272,96 @@ def create_interface():
                 gr.Markdown("""
                 ## How to Use This Tool
-                1. **Upload** your patient's Excel file
-                2. **Click Analyze** to process the history
-                3. **Review** the comprehensive analysis
-                4. **Download** the full report
-                ### File Requirements
-                Excel file must contain:
                 - Booking Number
                 - Form Name
-                - Form Item
                 - Item Response
-                - Interview Date
-                - Interviewer
                 - Description
                 """)
-        analyze_btn.click(
-            fn=analyzer.analyze,
-            inputs=file_input,
             outputs=[output_display, report_download],
             api_name="analyze"
         )
@@ -348,13 +370,19 @@ def create_interface():
 if __name__ == "__main__":
     try:
-        demo = create_interface()
-        demo.launch(
             server_name="0.0.0.0",
             server_port=7860,
             show_error=True,
-            allowed_paths=[WORKING_DIR, REPORT_DIR]
         )
     except Exception as e:
-        print(f"Error launching application: {str(e)}")
         sys.exit(1)

 import sys
 import os
 import pandas as pd
+import json
 import gradio as gr
+from typing import List, Tuple, Dict, Any
 import hashlib
 import shutil
+import re
 from datetime import datetime
+import time
 from collections import defaultdict
+# Configuration and setup
+persistent_dir = "/data/hf_cache"
+os.makedirs(persistent_dir, exist_ok=True)
+model_cache_dir = os.path.join(persistent_dir, "txagent_models")
+tool_cache_dir = os.path.join(persistent_dir, "tool_cache")
+file_cache_dir = os.path.join(persistent_dir, "cache")
+report_dir = os.path.join(persistent_dir, "reports")
+for directory in [model_cache_dir, tool_cache_dir, file_cache_dir, report_dir]:
+    os.makedirs(directory, exist_ok=True)
+os.environ["HF_HOME"] = model_cache_dir
+os.environ["TRANSFORMERS_CACHE"] = model_cache_dir
+current_dir = os.path.dirname(os.path.abspath(__file__))
+src_path = os.path.abspath(os.path.join(current_dir, "src"))
+sys.path.insert(0, src_path)
 from txagent.txagent import TxAgent
+# Constants
+MAX_TOKENS = 32768  # TxAgent's maximum token limit
+CHUNK_SIZE = 3000  # Target chunk size to stay under token limit
+MAX_NEW_TOKENS = 1024
+def file_hash(path: str) -> str:
+    """Generate MD5 hash of file contents"""
+    with open(path, "rb") as f:
+        return hashlib.md5(f.read()).hexdigest()
+def clean_response(text: str) -> str:
+    """Clean and normalize text output"""
+    try:
+        text = text.encode('utf-8', 'surrogatepass').decode('utf-8')
+    except UnicodeError:
+        text = text.encode('utf-8', 'replace').decode('utf-8')
+    text = re.sub(r"\[.*?\]|\bNone\b", "", text, flags=re.DOTALL)
+    text = re.sub(r"\n{3,}", "\n\n", text)
+    text = re.sub(r"[^\n#\-\*\w\s\.,:\(\)]+", "", text)
+    return text.strip()
+def estimate_tokens(text: str) -> int:
+    """Approximate token count (1 token ~ 4 characters)"""
+    return len(text) // 4
+def process_patient_data(df: pd.DataFrame) -> Dict[str, Any]:
+    """Process raw patient data into structured format"""
+    data = {
+        'bookings': defaultdict(list),
+        'medications': defaultdict(list),
+        'diagnoses': defaultdict(list),
+        'tests': defaultdict(list),
+        'doctors': set(),
+        'timeline': []
+    }
+    # Sort by date and group by booking
+    df = df.sort_values('Interview Date')
+    for booking, group in df.groupby('Booking Number'):
+        for _, row in group.iterrows():
+            entry = {
+                'booking': booking,
+                'date': str(row['Interview Date']),
+                'doctor': str(row['Interviewer']),
+                'form': str(row['Form Name']),
+                'item': str(row['Form Item']),
+                'response': str(row['Item Response']),
+                'notes': str(row['Description'])
             }
+            data['bookings'][booking].append(entry)
+            data['timeline'].append(entry)
+            data['doctors'].add(entry['doctor'])
+            # Categorize entries
+            form_lower = entry['form'].lower()
+            if 'medication' in form_lower or 'drug' in form_lower:
+                data['medications'][entry['item']].append(entry)
+            elif 'diagnosis' in form_lower:
+                data['diagnoses'][entry['item']].append(entry)
+            elif 'test' in form_lower or 'lab' in form_lower:
+                data['tests'][entry['item']].append(entry)
+    return data
+def generate_analysis_prompt(patient_data: Dict[str, Any], booking: str) -> str:
+    """Generate focused analysis prompt for a booking"""
+    booking_entries = patient_data['bookings'][booking]
+    # Build timeline string
+    timeline = "\n".join(
+        f"- {entry['date']}: {entry['form']} - {entry['item']} = {entry['response']} (by {entry['doctor']})"
+        for entry in booking_entries
+    )
+    # Get current medications
+    current_meds = []
+    for med, entries in patient_data['medications'].items():
+        if any(e['booking'] == booking for e in entries):
+            latest = max((e for e in entries if e['booking'] == booking), key=lambda x: x['date'])
+            current_meds.append(f"- {med}: {latest['response']} (as of {latest['date']})")
+    # Get current diagnoses
+    current_diags = []
+    for diag, entries in patient_data['diagnoses'].items():
+        if any(e['booking'] == booking for e in entries):
+            latest = max((e for e in entries if e['booking'] == booking), key=lambda x: x['date'])
+            current_diags.append(f"- {diag}: {latest['response']} (as of {latest['date']})")
+    prompt = f"""
+**Comprehensive Patient Analysis - Booking {booking}**
+**Patient Timeline:**
+{timeline}
+**Current Medications:**
+{'\n'.join(current_meds) if current_meds else "None recorded"}
+**Current Diagnoses:**
+{'\n'.join(current_diags) if current_diags else "None recorded"}
+**Analysis Instructions:**
+1. Review the patient's complete history across all visits
+2. Identify any potential missed diagnoses based on symptoms and test results
+3. Check for medication conflicts or inappropriate prescriptions
+4. Note any incomplete assessments or missing tests
+5. Flag any urgent follow-up needs
+6. Compare findings across different doctors for consistency
+**Required Output Format:**
+### Missed Diagnoses
+[Potential diagnoses that were not identified]
+### Medication Issues
+[Conflicts, side effects, inappropriate prescriptions]
+### Assessment Gaps
+[Missing tests or incomplete evaluations]
+### Follow-up Recommendations
+[Urgent and non-urgent follow-up needs]
+### Doctor Consistency
+[Discrepancies between different providers]
+"""
+    return prompt
+def chunk_patient_data(patient_data: Dict[str, Any]) -> List[Dict[str, Any]]:
+    """Split patient data into manageable chunks"""
+    chunks = []
+    current_chunk = defaultdict(list)
+    current_size = 0
+    for booking, entries in patient_data['bookings'].items():
+        booking_size = sum(estimate_tokens(str(e)) for e in entries)
+        if current_size + booking_size > CHUNK_SIZE and current_chunk:
+            chunks.append(dict(current_chunk))
+            current_chunk = defaultdict(list)
+            current_size = 0
+        current_chunk['bookings'][booking] = entries
+        current_size += booking_size
+        # Add related data
+        for med, med_entries in patient_data['medications'].items():
+            if any(e['booking'] == booking for e in med_entries):
+                current_chunk['medications'][med].extend(
+                    e for e in med_entries if e['booking'] == booking
+                )
+        for diag, diag_entries in patient_data['diagnoses'].items():
+            if any(e['booking'] == booking for e in diag_entries):
+                current_chunk['diagnoses'][diag].extend(
+                    e for e in diag_entries if e['booking'] == booking
+                )
+    if current_chunk:
+        chunks.append(dict(current_chunk))
+    return chunks
+def init_agent():
+    """Initialize TxAgent with proper configuration"""
+    default_tool_path = os.path.abspath("data/new_tool.json")
+    target_tool_path = os.path.join(tool_cache_dir, "new_tool.json")
+    if not os.path.exists(target_tool_path):
+        shutil.copy(default_tool_path, target_tool_path)
+    agent = TxAgent(
+        model_name="mims-harvard/TxAgent-T1-Llama-3.1-8B",
+        rag_model_name="mims-harvard/ToolRAG-T1-GTE-Qwen2-1.5B",
+        tool_files_dict={"new_tool": target_tool_path},
+        force_finish=True,
+        enable_checker=True,
+        step_rag_num=4,
+        seed=100,
+        additional_default_tools=[],
+    )
+    agent.init_model()
+    return agent
+def analyze_with_agent(agent, prompt: str) -> str:
+    """Run analysis with proper error handling"""
+    try:
+        response = ""
+        for result in agent.run_gradio_chat(
+            message=prompt,
+            history=[],
+            temperature=0.2,
+            max_new_tokens=MAX_NEW_TOKENS,
+            max_token=MAX_TOKENS,
+            call_agent=False,
+            conversation=[],
+        ):
+            if isinstance(result, list):
+                for r in result:
+                    if hasattr(r, 'content') and r.content:
+                        response += clean_response(r.content) + "\n"
+            elif isinstance(result, str):
+                response += clean_response(result) + "\n"
+            elif hasattr(result, 'content'):
+                response += clean_response(result.content) + "\n"
+        return response.strip()
+    except Exception as e:
+        return f"Error in analysis: {str(e)}"
+def create_ui(agent):
+    with gr.Blocks(theme=gr.themes.Soft(), title="Patient History Analyzer") as demo:
         gr.Markdown("# 🏥 Comprehensive Patient History Analysis")
+       -With gr.Tabs():
             with gr.TabItem("Analysis"):
                 with gr.Row():
                     with gr.Column(scale=1):
+                        file_upload = gr.File(
+                            label="Upload Patient Excel File",
                             file_types=[".xlsx"],
+                            file_count="single"
                         )
+                        analysis_btn = gr.Button("Analyze Patient History", variant="primary")
+                        status = gr.Markdown("Ready for analysis")
                     with gr.Column(scale=2):
                         output_display = gr.Markdown(
                 gr.Markdown("""
                 ## How to Use This Tool
+                1. **Upload Excel File**: Patient history Excel file
+                2. **Click Analyze**: System will process all bookings
+                3. **Review Results**: Comprehensive analysis appears
+                4. **Download Report**: Full report with all findings
+                ### Excel Requirements
+                Must contain these columns:
                 - Booking Number
+                - Interview Date
+                - Interviewer (Doctor)
                 - Form Name
+                - Form Item
                 - Item Response
                 - Description
+                ### Analysis Includes:
+                - Missed diagnoses across visits
+                - Medication conflicts over time
+                - Incomplete assessments
+                - Doctor consistency checks
+                - Follow-up recommendations
                 """)
+        def analyze_patient(file) -> Tuple[str, str]:
+            if not file:
+                raise gr.Error("Please upload an Excel file first")
+            try:
+                # Process Excel file
+                df = pd.read_excel(file.name)
+                patient_data = process_patient_data(df)
+                # Generate and process prompts
+                full_report = []
+                bookings_processed = 0
+                for booking in patient_data['bookings']:
+                    prompt = generate_analysis_prompt(patient_data, booking)
+                    response = analyze_with_agent(agent, prompt)
+                    if "Error in analysis" not in response:
+                        bookings_processed += 1
+                        full_report.append(f"## Booking {booking}\n{response}\n")
+                    yield "\n".join(full_report), None
+                    time.sleep(0.1)  # Prevent UI freezing
+                # Generate overall summary
+                if bookings_processed > 1:
+                    summary_prompt = f"""
+**Comprehensive Patient Summary**
+Analyze all bookings ({bookings_processed} total) to identify:
+1. Patterns across the entire treatment history
+2. Chronic issues that may have been missed
+3. Medication changes over time
+4. Doctor consistency across visits
+5. Long-term recommendations
+**Required Format:**
+### Chronic Health Patterns
+[Recurring issues over time]
+ascopy
+### Treatment Evolution
+[How treatment has changed]
+### Long-term Concerns
+[Issues needing ongoing attention]
+### Comprehensive Recommendations
+[Overall care plan]
+"""
+                    summary = analyze_with_agent(agent, summary_prompt)
+                    full_report.append(f"## Overall Patient Summary\n{summary}\n")
+                # Save report
+                report_path = os.path.join(report_dir, f"patient_report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.md")
+                with open(report_path, 'w', encoding='utf-8') as f:
+                    f.write("\n".join(full_report))
+                yield "\n".join(full_report), report_path
+            except Exception as e:
+                raise gr.Error(f"Analysis failed: {str(e)}")
+        analysis_btn.click(
+            analyze_patient,
+            inputs=file_upload,
             outputs=[output_display, report_download],
             api_name="analyze"
         )
 if __name__ == "__main__":
     try:
+        agent = init_agent()
+        demo = create_ui(agent)
+        demo.queue(
+            api_open=False,
+            max_size=20
+        ).launch(
             server_name="0.0.0.0",
             server_port=7860,
             show_error=True,
+            allowed_paths=[report_dir],
+            share=False
         )
     except Exception as e:
+        print(f"Failed to launch application: {str(e)}")
         sys.exit(1)