Spaces:

RocketFarmStudios
/

CPS-Test-Mobile

Paused

App Files Files Community

Ali2206 commited on Apr 25

Commit

59f3278

verified ·

1 Parent(s): a1a096d

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -48

app.py CHANGED Viewed

@@ -1,11 +1,12 @@
 import sys, os, json, shutil, re, time, gc, hashlib
 import pandas as pd
 from datetime import datetime
-from concurrent.futures import ThreadPoolExecutor, as_completed
 from typing import List, Tuple, Dict, Union
 import gradio as gr
 # Constants
 MAX_MODEL_TOKENS = 131072
 MAX_NEW_TOKENS = 4096
@@ -42,14 +43,11 @@ def clean_response(text: str) -> str:
 def extract_text_from_excel(path: str) -> str:
     all_text = []
-    try:
-        xls = pd.ExcelFile(path)
-        for sheet in xls.sheet_names:
-            df = xls.parse(sheet).astype(str).fillna("")
-            rows = df.apply(lambda row: " | ".join(row), axis=1)
-            all_text += [f"[{sheet}] {line}" for line in rows]
-    except Exception as e:
-        raise ValueError(f"Error reading Excel file: {str(e)}")
     return "\n".join(all_text)
 def split_text(text: str, max_tokens=MAX_CHUNK_TOKENS) -> List[str]:
@@ -69,27 +67,12 @@ def split_text(text: str, max_tokens=MAX_CHUNK_TOKENS) -> List[str]:
     return chunks
 def build_prompt(chunk: str) -> str:
-    return f"""### Unstructured Clinical Records
-Analyze the clinical notes below and summarize with:
-- Diagnostic Patterns
-- Medication Issues
-- Missed Opportunities
-- Inconsistencies
-- Follow-up Recommendations
----
-{chunk}
----
-Respond concisely in bullet points with clinical reasoning."""
 def init_agent() -> TxAgent:
     tool_path = os.path.join(tool_cache_dir, "new_tool.json")
     if not os.path.exists(tool_path):
         shutil.copy(os.path.abspath("data/new_tool.json"), tool_path)
     agent = TxAgent(
         model_name="mims-harvard/TxAgent-T1-Llama-3.1-8B",
         rag_model_name="mims-harvard/ToolRAG-T1-GTE-Qwen2-1.5B",
@@ -102,15 +85,15 @@ def init_agent() -> TxAgent:
     agent.init_model()
     return agent
-def analyze_chunks_parallel(agent, chunks: List[str]) -> List[str]:
-    results = [None] * len(chunks)
-    def analyze(i, chunk):
         prompt = build_prompt(chunk)
         try:
-            if estimate_tokens(prompt) > MAX_MODEL_TOKENS:
-                return i, f"❌ Chunk {i+1} too long. Skipped."
-            response = ""
             for r in agent.run_gradio_chat(
                 message=prompt,
                 history=[],
@@ -129,24 +112,13 @@ def analyze_chunks_parallel(agent, chunks: List[str]) -> List[str]:
                 elif hasattr(r, "content"):
                     response += r.content
             gc.collect()
-            return i, clean_response(response)
         except Exception as e:
-            return i, f"❌ Error in chunk {i+1}: {str(e)}"
-    with ThreadPoolExecutor(max_workers=4) as executor:
-        futures = [executor.submit(analyze, i, chunk) for i, chunk in enumerate(chunks)]
-        for future in as_completed(futures):
-            i, res = future.result()
-            results[i] = res
     return results
 def generate_final_summary(agent, combined: str) -> str:
-    final_prompt = f"""Provide a structured medical report based on the following summaries:
-{combined}
-Respond in detailed medical bullet points."""
     full_report = ""
     for r in agent.run_gradio_chat(
         message=final_prompt,
@@ -178,7 +150,7 @@ def process_report(agent, file, messages: List[Dict[str, str]]) -> Tuple[List[Di
         chunks = split_text(extracted)
         messages.append({"role": "assistant", "content": f"🔍 Split into {len(chunks)} chunks. Analyzing..."})
-        chunk_results = analyze_chunks_parallel(agent, chunks)
         valid = [res for res in chunk_results if not res.startswith("❌")]
         if not valid:
@@ -226,4 +198,4 @@ if __name__ == "__main__":
         ui.launch(server_name="0.0.0.0", server_port=7860, allowed_paths=["/data/hf_cache/reports"], share=False)
     except Exception as err:
         print(f"Startup failed: {err}")
-        sys.exit(1)

 import sys, os, json, shutil, re, time, gc, hashlib
 import pandas as pd
 from datetime import datetime
 from typing import List, Tuple, Dict, Union
 import gradio as gr
+from concurrent.futures import ThreadPoolExecutor
 # Constants
 MAX_MODEL_TOKENS = 131072
 MAX_NEW_TOKENS = 4096
 def extract_text_from_excel(path: str) -> str:
     all_text = []
+    xls = pd.ExcelFile(path)
+    for sheet in xls.sheet_names:
+        df = xls.parse(sheet).astype(str).fillna("")
+        rows = df.apply(lambda row: " | ".join(row), axis=1)
+        all_text += [f"[{sheet}] {line}" for line in rows]
     return "\n".join(all_text)
 def split_text(text: str, max_tokens=MAX_CHUNK_TOKENS) -> List[str]:
     return chunks
 def build_prompt(chunk: str) -> str:
+    return f"""### Unstructured Clinical Records\n\nAnalyze the clinical notes below and summarize with:\n- Diagnostic Patterns\n- Medication Issues\n- Missed Opportunities\n- Inconsistencies\n- Follow-up Recommendations\n\n---\n\n{chunk}\n\n---\nRespond concisely in bullet points with clinical reasoning."""
 def init_agent() -> TxAgent:
     tool_path = os.path.join(tool_cache_dir, "new_tool.json")
     if not os.path.exists(tool_path):
         shutil.copy(os.path.abspath("data/new_tool.json"), tool_path)
     agent = TxAgent(
         model_name="mims-harvard/TxAgent-T1-Llama-3.1-8B",
         rag_model_name="mims-harvard/ToolRAG-T1-GTE-Qwen2-1.5B",
     agent.init_model()
     return agent
+def analyze_serial(agent, chunks: List[str]) -> List[str]:
+    results = []
+    for i, chunk in enumerate(chunks):
         prompt = build_prompt(chunk)
+        if estimate_tokens(prompt) > MAX_MODEL_TOKENS:
+            results.append(f"❌ Chunk {i+1} too long. Skipped.")
+            continue
+        response = ""
         try:
             for r in agent.run_gradio_chat(
                 message=prompt,
                 history=[],
                 elif hasattr(r, "content"):
                     response += r.content
             gc.collect()
+            results.append(clean_response(response))
         except Exception as e:
+            results.append(f"❌ Error in chunk {i+1}: {str(e)}")
     return results
 def generate_final_summary(agent, combined: str) -> str:
+    final_prompt = f"""Provide a structured medical report based on the following summaries:\n\n{combined}\n\nRespond in detailed medical bullet points."""
     full_report = ""
     for r in agent.run_gradio_chat(
         message=final_prompt,
         chunks = split_text(extracted)
         messages.append({"role": "assistant", "content": f"🔍 Split into {len(chunks)} chunks. Analyzing..."})
+        chunk_results = analyze_serial(agent, chunks)
         valid = [res for res in chunk_results if not res.startswith("❌")]
         if not valid:
         ui.launch(server_name="0.0.0.0", server_port=7860, allowed_paths=["/data/hf_cache/reports"], share=False)
     except Exception as err:
         print(f"Startup failed: {err}")
+        sys.exit(1)