Spaces:

RocketFarmStudios
/

CPS-Test-Mobile

Paused

App Files Files Community

Ali2206 commited on Apr 24

Commit

a57b988

verified ·

1 Parent(s): 0e6914c

Update app.py

Browse files

Files changed (1) hide show

app.py +129 -124

app.py CHANGED Viewed

@@ -1,12 +1,14 @@
 import sys
 import os
 import pandas as pd
 import gradio as gr
-from typing import List, Tuple
 import re
 from datetime import datetime
 from concurrent.futures import ThreadPoolExecutor, as_completed
-import copy
 # Setup directories
 persistent_dir = "/data/hf_cache"
@@ -14,9 +16,10 @@ os.makedirs(persistent_dir, exist_ok=True)
 model_cache_dir = os.path.join(persistent_dir, "txagent_models")
 tool_cache_dir = os.path.join(persistent_dir, "tool_cache")
 report_dir = os.path.join(persistent_dir, "reports")
-for d in [model_cache_dir, tool_cache_dir, report_dir]:
     os.makedirs(d, exist_ok=True)
 os.environ["HF_HOME"] = model_cache_dir
@@ -36,6 +39,9 @@ def clean_response(text: str) -> str:
     text = re.sub(r"[^\n#\-\*\w\s\.,:\(\)]+", "", text)
     return text.strip()
 def extract_text_from_excel(file_path: str) -> str:
     all_text = []
     xls = pd.ExcelFile(file_path)
@@ -46,116 +52,121 @@ def extract_text_from_excel(file_path: str) -> str:
         all_text.extend(sheet_text)
     return "\n".join(all_text)
-def split_text_into_chunks(text: str) -> List[str]:
-    effective_max = MAX_CHUNK_TOKENS - PROMPT_OVERHEAD
-    lines, chunks, curr_chunk = text.split("\n"), [], []
-    curr_tokens = sum(len(line.split()) for line in curr_chunk)
     for line in lines:
-        line_tokens = len(line.split())
-        if curr_tokens + line_tokens > effective_max:
             if curr_chunk:
                 chunks.append("\n".join(curr_chunk))
-            curr_chunk, curr_tokens = [line], line_tokens
         else:
             curr_chunk.append(line)
-            curr_tokens += line_tokens
-    if curr_chunk:
         chunks.append("\n".join(curr_chunk))
     return chunks
 def build_prompt_from_text(chunk: str) -> str:
-    return f"""Analyze these clinical notes and provide:
-- Diagnostic patterns
-- Medication issues
-- Missed opportunities
-- Inconsistencies
-- Follow-up recommendations
-Respond with clear bullet points:
-{chunk}"""
-class AgentWrapper:
-    def __init__(self):
-        self.agent = None
-    def init_agent(self):
-        tool_path = os.path.join(tool_cache_dir, "new_tool.json")
-        if not os.path.exists(tool_path):
-            import shutil
-            shutil.copy("data/new_tool.json", tool_path)
-        self.agent = TxAgent(
-            model_name="mims-harvard/TxAgent-T1-Llama-3.1-8B",
-            rag_model_name="mims-harvard/ToolRAG-T1-GTE-Qwen2-1.5B",
-            tool_files_dict={"new_tool": tool_path},
-            force_finish=True,
-            enable_checker=True,
-            step_rag_num=4,
-            seed=100
-        )
-        self.agent.init_model()
-        return self.agent
-def process_final_report(file, chatbot_state: List[Tuple[str, str]], agent: TxAgent):
-    messages = copy.deepcopy(chatbot_state) if chatbot_state else []
-    if file is None:
-        messages.append(("assistant", "❌ Please upload a valid Excel file."))
-        return messages, None
-    messages.append(("user", f"Processing Excel file: {os.path.basename(file.name)}"))
-    yield messages, None
-    try:
-        text = extract_text_from_excel(file.name)
-        chunks = split_text_into_chunks(text)
-        messages.append(("assistant", "🔍 Analyzing clinical data..."))
-        yield messages, None
-        full_report = []
-        for i, chunk in enumerate(chunks, 1):
-            prompt = build_prompt_from_text(chunk)
-            response = ""
-            for res in agent.run_gradio_chat(
-                message=prompt, history=[], temperature=0.2,
-                max_new_tokens=MAX_NEW_TOKENS, max_token=MAX_MODEL_TOKENS,
-                call_agent=False, conversation=[]
-            ):
-                if isinstance(res, str):
-                    response += res
-                elif hasattr(res, "content"):
-                    response += res.content
-            cleaned = clean_response(response)
-            full_report.append(cleaned)
-            progress_msg = f"✅ Analyzed section {i}/{len(chunks)}"
-            if len(messages) > 2 and "Analyzed section" in messages[-1][1]:
-                messages[-1] = ("assistant", progress_msg)
-            else:
-                messages.append(("assistant", progress_msg))
-            yield messages, None
-        final_report = "## 🧠 Final Clinical Report\n\n" + "\n\n".join(full_report)
-        report_path = os.path.join(report_dir, f"report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.md")
-        with open(report_path, 'w') as f:
-            f.write(final_report)
-        messages.append(("assistant", f"✅ Report generated and saved: {os.path.basename(report_path)}"))
-        messages.append(("assistant", final_report))
-        yield messages, report_path
-    except Exception as e:
-        messages.append(("assistant", f"❌ Error: {str(e)}"))
-        yield messages, None
-def create_ui():
-    agent_wrapper = AgentWrapper()
-    agent = agent_wrapper.init_agent()
     with gr.Blocks(css="""
         body {
             background: #10141f;
@@ -192,39 +203,33 @@ def create_ui():
             background: linear-gradient(135deg, #37b6e9, #4b4ced);
         }
     """) as demo:
-        gr.Markdown("""# Clinical Reasoning Assistant
 Upload clinical Excel records below and click **Analyze** to generate a medical summary.
 """)
-        with gr.Row():
-            file_upload = gr.File(label="Upload Excel File", file_types=[".xlsx"])
-            analyze_btn = gr.Button("Analyze", variant="primary")
-        chatbot = gr.Chatbot(label="Chatbot", elem_classes="chatbot", type="messages")
         report_output = gr.File(label="Download Report", visible=False)
-        chatbot_state = gr.State([])
-        def wrapped_process(file, state):
-            yield from process_final_report(file, state, agent)
         analyze_btn.click(
-            fn=wrapped_process,
             inputs=[file_upload, chatbot_state],
-            outputs=[chatbot, report_output],
-            show_progress="hidden"
         )
     return demo
 if __name__ == "__main__":
     try:
-        demo = create_ui()
-        demo.launch(
-            server_name="0.0.0.0",
-            server_port=7860,
-            allowed_paths=["/data/hf_cache/reports"],
-            share=False
-        )
     except Exception as e:
         print(f"Error: {str(e)}")
-        sys.exit(1)

 import sys
 import os
 import pandas as pd
+import json
 import gradio as gr
+from typing import List, Tuple, Dict, Any, Union
+import hashlib
+import shutil
 import re
 from datetime import datetime
 from concurrent.futures import ThreadPoolExecutor, as_completed
 # Setup directories
 persistent_dir = "/data/hf_cache"
 model_cache_dir = os.path.join(persistent_dir, "txagent_models")
 tool_cache_dir = os.path.join(persistent_dir, "tool_cache")
+file_cache_dir = os.path.join(persistent_dir, "cache")
 report_dir = os.path.join(persistent_dir, "reports")
+for d in [model_cache_dir, tool_cache_dir, file_cache_dir, report_dir]:
     os.makedirs(d, exist_ok=True)
 os.environ["HF_HOME"] = model_cache_dir
     text = re.sub(r"[^\n#\-\*\w\s\.,:\(\)]+", "", text)
     return text.strip()
+def estimate_tokens(text: str) -> int:
+    return len(text) // 3.5 + 1
 def extract_text_from_excel(file_path: str) -> str:
     all_text = []
     xls = pd.ExcelFile(file_path)
         all_text.extend(sheet_text)
     return "\n".join(all_text)
+def split_text_into_chunks(text: str, max_tokens: int = MAX_CHUNK_TOKENS, max_chunks: int = 30) -> List[str]:
+    effective_max = max_tokens - PROMPT_OVERHEAD
+    lines, chunks, curr_chunk, curr_tokens = text.split("\n"), [], [], 0
     for line in lines:
+        t = estimate_tokens(line)
+        if curr_tokens + t > effective_max:
             if curr_chunk:
                 chunks.append("\n".join(curr_chunk))
+            if len(chunks) >= max_chunks:
+                break
+            curr_chunk, curr_tokens = [line], t
         else:
             curr_chunk.append(line)
+            curr_tokens += t
+    if curr_chunk and len(chunks) < max_chunks:
         chunks.append("\n".join(curr_chunk))
     return chunks
 def build_prompt_from_text(chunk: str) -> str:
+    return f"""
+### Unstructured Clinical Records
+Analyze the following clinical notes and provide a detailed, concise summary focusing on:
+- Diagnostic Patterns
+- Medication Issues
+- Missed Opportunities
+- Inconsistencies
+- Follow-up Recommendations
+---
+{chunk}
+---
+Respond in well-structured bullet points with medical reasoning.
+"""
+def init_agent():
+    tool_path = os.path.join(tool_cache_dir, "new_tool.json")
+    if not os.path.exists(tool_path):
+        shutil.copy(os.path.abspath("data/new_tool.json"), tool_path)
+    agent = TxAgent(
+        model_name="mims-harvard/TxAgent-T1-Llama-3.1-8B",
+        rag_model_name="mims-harvard/ToolRAG-T1-GTE-Qwen2-1.5B",
+        tool_files_dict={"new_tool": tool_path},
+        force_finish=True,
+        enable_checker=True,
+        step_rag_num=4,
+        seed=100
+    )
+    agent.init_model()
+    return agent
+def process_final_report(agent, file, chatbot_state: List[Tuple[str, str]]) -> Tuple[List[Tuple[str, str]], Union[str, None], str]:
+    messages = chatbot_state if chatbot_state else []
+    if file is None or not hasattr(file, "name"):
+        return messages + [("assistant", "❌ Please upload a valid Excel file.")], None, ""
+    messages.append(("user", f"📎 Uploaded file: {os.path.basename(file.name)}"))
+    text = extract_text_from_excel(file.name)
+    chunks = split_text_into_chunks(text)
+    chunk_responses = [None] * len(chunks)
+    def analyze_chunk(i, chunk):
+        prompt = build_prompt_from_text(chunk)
+        response = ""
+        for res in agent.run_gradio_chat(
+            message=prompt, history=[], temperature=0.2,
+            max_new_tokens=MAX_NEW_TOKENS, max_token=MAX_MODEL_TOKENS,
+            call_agent=False, conversation=[]
+        ):
+            if isinstance(res, str):
+                response += res
+            elif hasattr(res, "content"):
+                response += res.content
+            elif isinstance(res, list):
+                for r in res:
+                    if hasattr(r, "content"):
+                        response += r.content
+        return i, clean_response(response)
+    with ThreadPoolExecutor(max_workers=1) as executor:
+        futures = [executor.submit(analyze_chunk, i, c) for i, c in enumerate(chunks)]
+        for f in as_completed(futures):
+            i, result = f.result()
+            chunk_responses[i] = result
+    valid = [r for r in chunk_responses if r and not r.startswith("❌")]
+    if not valid:
+        return messages + [("assistant", "❌ No valid results found in the file.")], None, ""
+    summary_prompt = f"Summarize this analysis in a final structured report:\n\n" + "\n\n".join(valid)
+    messages.append(("assistant", "⏳ Generating the final report..."))
+    final_report = ""
+    for res in agent.run_gradio_chat(
+        message=summary_prompt, history=[], temperature=0.2,
+        max_new_tokens=MAX_NEW_TOKENS, max_token=MAX_MODEL_TOKENS,
+        call_agent=False, conversation=[]
+    ):
+        if isinstance(res, str):
+            final_report += res
+        elif hasattr(res, "content"):
+            final_report += res.content
+    cleaned = clean_response(final_report)
+    messages.append(("assistant", cleaned))  # ✅ Append answer to chat
+    report_path = os.path.join(report_dir, f"report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.md")
+    with open(report_path, 'w') as f:
+        f.write(f"# 🧠 Final Patient Report\n\n{cleaned}")
+    return messages, report_path, cleaned
+def create_ui(agent):
     with gr.Blocks(css="""
         body {
             background: #10141f;
             background: linear-gradient(135deg, #37b6e9, #4b4ced);
         }
     """) as demo:
+        gr.Markdown("""# 🧠 Clinical Reasoning Assistant
 Upload clinical Excel records below and click **Analyze** to generate a medical summary.
 """)
+        chatbot = gr.Chatbot(label="Chatbot", elem_classes="chatbot", type="tuples")
+        report_output_markdown = gr.Markdown(visible=False)
+        file_upload = gr.File(label="Upload Excel File", file_types=[".xlsx"])
+        analyze_btn = gr.Button("Analyze")
         report_output = gr.File(label="Download Report", visible=False)
+        chatbot_state = gr.State(value=[])
+        def update_ui(file, current_state):
+            messages, report_path, final_text = process_final_report(agent, file, current_state)
+            return messages, gr.update(visible=report_path is not None, value=report_path), messages, gr.update(visible=True, value=final_text)
         analyze_btn.click(
+            fn=update_ui,
             inputs=[file_upload, chatbot_state],
+            outputs=[chatbot, report_output, chatbot_state, report_output_markdown]
         )
     return demo
 if __name__ == "__main__":
     try:
+        agent = init_agent()
+        demo = create_ui(agent)
+        demo.launch(server_name="0.0.0.0", server_port=7860, allowed_paths=["/data/hf_cache/reports"], share=False)
     except Exception as e:
         print(f"Error: {str(e)}")
+        sys.exit(1)