Spaces:

RocketFarmStudios
/

CPS-Test-Mobile

Paused

App Files Files Community

Ali2206 commited on Apr 27

Commit

f10bfab

verified ·

1 Parent(s): b4dbed8

Update app.py

Browse files

Files changed (1) hide show

app.py +45 -4

app.py CHANGED Viewed

@@ -8,6 +8,7 @@ import time
 from datetime import datetime
 from typing import List, Tuple, Dict, Union
 import pandas as pd
 import gradio as gr
 import torch
@@ -63,6 +64,42 @@ def extract_text_from_excel(path: str) -> str:
                     all_text.append(f"[{sheet_name}] {text_line}")
     return "\n".join(all_text)
 def split_text(text: str, max_tokens=MAX_CHUNK_TOKENS) -> List[str]:
     effective_limit = max_tokens - PROMPT_OVERHEAD
     chunks, current, current_tokens = [], [], 0
@@ -129,7 +166,7 @@ def analyze_batches(agent, batches: List[List[str]]) -> List[str]:
             time.sleep(SAFE_SLEEP)
         except Exception as e:
             results.append(f"❌ Batch failed: {str(e)}")
-            time.sleep(SAFE_SLEEP * 2)  # longer sleep on error
     torch.cuda.empty_cache()
     gc.collect()
     return results
@@ -158,12 +195,16 @@ def generate_final_summary(agent, combined: str) -> str:
 def process_report(agent, file, messages: List[Dict[str, str]]) -> Tuple[List[Dict[str, str]], Union[str, None]]:
     if not file or not hasattr(file, "name"):
-        messages.append({"role": "assistant", "content": "❌ Please upload a valid Excel file."})
         return messages, None
     messages.append({"role": "user", "content": f"📂 Processing file: {os.path.basename(file.name)}"})
     try:
-        extracted = extract_text_from_excel(file.name)
         chunks = split_text(extracted)
         batches = batch_chunks(chunks, batch_size=BATCH_SIZE)
         messages.append({"role": "assistant", "content": f"🔍 Split into {len(batches)} batches. Analyzing..."})
@@ -211,7 +252,7 @@ def create_ui(agent):
         """)
         with gr.Column():
             chatbot = gr.Chatbot(label="CPS Assistant", height=700, type="messages")
-            upload = gr.File(label="Upload Medical File", file_types=[".xlsx"])
             analyze = gr.Button("🧠 Analyze")
             download = gr.File(label="Download Report", visible=False, interactive=False)

 from datetime import datetime
 from typing import List, Tuple, Dict, Union
 import pandas as pd
+import pdfplumber
 import gradio as gr
 import torch
                     all_text.append(f"[{sheet_name}] {text_line}")
     return "\n".join(all_text)
+def extract_text_from_csv(path: str) -> str:
+    all_text = []
+    try:
+        df = pd.read_csv(path).astype(str).fillna("")
+    except Exception:
+        return ""
+    for idx, row in df.iterrows():
+        non_empty = [cell.strip() for cell in row if cell.strip()]
+        if len(non_empty) >= 2:
+            text_line = " | ".join(non_empty)
+            if len(text_line) > 15:
+                all_text.append(text_line)
+    return "\n".join(all_text)
+def extract_text_from_pdf(path: str) -> str:
+    all_text = []
+    try:
+        with pdfplumber.open(path) as pdf:
+            for page in pdf.pages:
+                text = page.extract_text()
+                if text:
+                    all_text.append(text.strip())
+    except Exception:
+        return ""
+    return "\n".join(all_text)
+def extract_text(file_path: str) -> str:
+    if file_path.endswith(".xlsx"):
+        return extract_text_from_excel(file_path)
+    elif file_path.endswith(".csv"):
+        return extract_text_from_csv(file_path)
+    elif file_path.endswith(".pdf"):
+        return extract_text_from_pdf(file_path)
+    else:
+        return ""
 def split_text(text: str, max_tokens=MAX_CHUNK_TOKENS) -> List[str]:
     effective_limit = max_tokens - PROMPT_OVERHEAD
     chunks, current, current_tokens = [], [], 0
             time.sleep(SAFE_SLEEP)
         except Exception as e:
             results.append(f"❌ Batch failed: {str(e)}")
+            time.sleep(SAFE_SLEEP * 2)
     torch.cuda.empty_cache()
     gc.collect()
     return results
 def process_report(agent, file, messages: List[Dict[str, str]]) -> Tuple[List[Dict[str, str]], Union[str, None]]:
     if not file or not hasattr(file, "name"):
+        messages.append({"role": "assistant", "content": "❌ Please upload a valid file."})
         return messages, None
     messages.append({"role": "user", "content": f"📂 Processing file: {os.path.basename(file.name)}"})
     try:
+        extracted = extract_text(file.name)
+        if not extracted:
+            messages.append({"role": "assistant", "content": "❌ Could not extract text."})
+            return messages, None
         chunks = split_text(extracted)
         batches = batch_chunks(chunks, batch_size=BATCH_SIZE)
         messages.append({"role": "assistant", "content": f"🔍 Split into {len(batches)} batches. Analyzing..."})
         """)
         with gr.Column():
             chatbot = gr.Chatbot(label="CPS Assistant", height=700, type="messages")
+            upload = gr.File(label="Upload Medical File", file_types=[".xlsx", ".csv", ".pdf"])
             analyze = gr.Button("🧠 Analyze")
             download = gr.File(label="Download Report", visible=False, interactive=False)