Spaces:

RocketFarmStudios
/

CPS-Test-Mobile

Paused

App Files Files Community

Ali2206 commited on Apr 10

Commit

a834285

verified ·

1 Parent(s): 8c72711

Update ui/ui_core.py

Browse files

Files changed (1) hide show

ui/ui_core.py +20 -58

ui/ui_core.py CHANGED Viewed

@@ -2,6 +2,7 @@ import sys
 import os
 import pandas as pd
 import pdfplumber
 import gradio as gr
 from typing import List
@@ -20,7 +21,6 @@ def clean_final_response(text: str) -> str:
     if len(responses) <= 1:
         return f"<div style='padding:1em;border:1px solid #ccc;border-radius:12px;color:#fff;background:#353F54;'><p>{cleaned}</p></div>"
-    # Support multiple [Final Analysis] sections
     panels = []
     for i, section in enumerate(responses[1:], 1):
         final = section.strip()
@@ -32,59 +32,30 @@ def clean_final_response(text: str) -> str:
         )
     return "".join(panels)
-def extract_all_text_from_csv_or_excel(file_path: str, progress=None, index=0, total=1) -> str:
     try:
-        if not os.path.exists(file_path):
-            return f"File not found: {file_path}"
-        if progress:
-            progress((index + 1) / total, desc=f"Reading spreadsheet: {os.path.basename(file_path)}")
-        df = None
-        if file_path.endswith(".csv"):
             df = pd.read_csv(file_path, encoding_errors="replace", header=None, dtype=str, skip_blank_lines=False, on_bad_lines="skip")
-        elif file_path.endswith((".xls", ".xlsx")):
             try:
                 df = pd.read_excel(file_path, engine="openpyxl", header=None, dtype=str)
             except:
                 df = pd.read_excel(file_path, engine="xlrd", header=None, dtype=str)
         if df is None or df.empty:
-            return f"[Warning] No data extracted from: {file_path}"
-        df = df.fillna("")  # Handle missing data gracefully
-        lines = []
-        for _, row in df.iterrows():
-            line = " | ".join(str(cell) for cell in row if str(cell).strip())
-            if line:
-                lines.append(line)
-        return f"\U0001F4C4 {os.path.basename(file_path)}\n\n" + "\n".join(lines)
     except Exception as e:
-        return f"[Error reading {os.path.basename(file_path)}]: {str(e)}"
-def extract_all_text_from_pdf(file_path: str, progress=None, index=0, total=1) -> str:
-    try:
-        if not os.path.exists(file_path):
-            return f"PDF not found: {file_path}"
-        extracted = []
-        with pdfplumber.open(file_path) as pdf:
-            num_pages = len(pdf.pages)
-            for i, page in enumerate(pdf.pages):
-                try:
-                    text = page.extract_text() or ""
-                    extracted.append(text.strip())
-                    if progress:
-                        progress((index + (i / num_pages)) / total, desc=f"Reading PDF: {os.path.basename(file_path)} ({i+1}/{num_pages})")
-                except Exception as e:
-                    extracted.append(f"[Error reading page {i+1}]: {str(e)}")
-        return f"\U0001F4C4 {os.path.basename(file_path)}\n\n" + "\n\n".join(extracted)
-    except Exception as e:
-        return f"[Error reading PDF {os.path.basename(file_path)}]: {str(e)}"
 def chunk_text(text: str, max_tokens: int = 8192) -> List[str]:
     chunks = []
@@ -103,8 +74,6 @@ def chunk_text(text: str, max_tokens: int = 8192) -> List[str]:
         chunks.append(" ".join(chunk))
     return chunks
-# ... rest of the UI code remains unchanged
 def create_ui(agent: TxAgent):
     with gr.Blocks(theme=gr.themes.Soft()) as demo:
         gr.Markdown("<h1 style='text-align: center;'>\U0001F4CB CPS: Clinical Patient Support System</h1>")
@@ -140,18 +109,11 @@ def create_ui(agent: TxAgent):
                         if not hasattr(file, 'name'):
                             continue
                         path = file.name
-                        try:
-                            if path.endswith((".csv", ".xls", ".xlsx")):
-                                extracted_text += extract_all_text_from_csv_or_excel(path, progress, index, total_files) + "\n"
-                            elif path.endswith(".pdf"):
-                                extracted_text += extract_all_text_from_pdf(path, progress, index, total_files) + "\n"
-                            else:
-                                extracted_text += f"(Uploaded file: {os.path.basename(path)})\n"
-                        except Exception as file_error:
-                            extracted_text += f"[Error processing {os.path.basename(path)}]: {str(file_error)}\n"
-                sanitized = sanitize_utf8(extracted_text.strip())
-                chunks = chunk_text(sanitized)
                 full_response = ""
                 for i, chunk in enumerate(chunks):

 import os
 import pandas as pd
 import pdfplumber
+import json
 import gradio as gr
 from typing import List
     if len(responses) <= 1:
         return f"<div style='padding:1em;border:1px solid #ccc;border-radius:12px;color:#fff;background:#353F54;'><p>{cleaned}</p></div>"
     panels = []
     for i, section in enumerate(responses[1:], 1):
         final = section.strip()
         )
     return "".join(panels)
+def convert_file_to_json(file_path: str, file_type: str) -> str:
     try:
+        if file_type == "csv":
             df = pd.read_csv(file_path, encoding_errors="replace", header=None, dtype=str, skip_blank_lines=False, on_bad_lines="skip")
+        elif file_type in ["xls", "xlsx"]:
             try:
                 df = pd.read_excel(file_path, engine="openpyxl", header=None, dtype=str)
             except:
                 df = pd.read_excel(file_path, engine="xlrd", header=None, dtype=str)
+        elif file_type == "pdf":
+            with pdfplumber.open(file_path) as pdf:
+                text = "\n".join([page.extract_text() or "" for page in pdf.pages])
+            return json.dumps({"filename": os.path.basename(file_path), "content": text.strip()})
+        else:
+            return json.dumps({"error": f"Unsupported file type: {file_type}"})
         if df is None or df.empty:
+            return json.dumps({"warning": f"No data extracted from: {file_path}"})
+        df = df.fillna("")
+        content = df.astype(str).values.tolist()
+        return json.dumps({"filename": os.path.basename(file_path), "rows": content})
     except Exception as e:
+        return json.dumps({"error": f"Error reading {os.path.basename(file_path)}: {str(e)}"})
 def chunk_text(text: str, max_tokens: int = 8192) -> List[str]:
     chunks = []
         chunks.append(" ".join(chunk))
     return chunks
 def create_ui(agent: TxAgent):
     with gr.Blocks(theme=gr.themes.Soft()) as demo:
         gr.Markdown("<h1 style='text-align: center;'>\U0001F4CB CPS: Clinical Patient Support System</h1>")
                         if not hasattr(file, 'name'):
                             continue
                         path = file.name
+                        extension = path.split(".")[-1].lower()
+                        json_text = convert_file_to_json(path, extension)
+                        extracted_text += sanitize_utf8(json_text) + "\n"
+                chunks = chunk_text(extracted_text.strip())
                 full_response = ""
                 for i, chunk in enumerate(chunks):