Spaces:

RocketFarmStudios
/

CPS-Test-Mobile

Paused

App Files Files Community

Ali2206 commited on Apr 23

Commit

04c881d

verified ·

1 Parent(s): 4bfbcac

Update app.py

Browse files

Files changed (1) hide show

app.py +98 -180

app.py CHANGED Viewed

@@ -7,7 +7,6 @@ import shutil
 import re
 from datetime import datetime
 import time
-from transformers import AutoTokenizer
 import asyncio
 import logging
 from concurrent.futures import ThreadPoolExecutor, as_completed
@@ -37,14 +36,7 @@ MAX_MODEL_TOKENS = 131072  # TxAgent's max token limit
 MAX_CHUNK_TOKENS = 32768   # Larger chunks to reduce number of chunks
 MAX_NEW_TOKENS = 512       # Optimized for fast generation
 PROMPT_OVERHEAD = 500      # Estimated tokens for prompt template
-MAX_CONCURRENT = 4         # Reduced concurrency to avoid vLLM socket issues
-# Initialize tokenizer for precise token counting
-try:
-    tokenizer = AutoTokenizer.from_pretrained("mims-harvard/TxAgent-T1-Llama-3.1-8B")
-except Exception as e:
-    print(f"Warning: Could not load tokenizer, falling back to heuristic: {str(e)}")
-    tokenizer = None
 # Setup logging
 logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
@@ -61,13 +53,9 @@ def clean_response(text: str) -> str:
     return text.strip()
 def estimate_tokens(text: str) -> int:
-    """Estimate tokens using tokenizer if available, else fall back to heuristic."""
-    if tokenizer:
-        return len(tokenizer.encode(text, add_special_tokens=False))
-    return len(text) // 3.5 + 1
 def extract_text_from_excel(file_path: str) -> str:
-    """Extract text from all sheets in an Excel file."""
     all_text = []
     try:
         xls = pd.ExcelFile(file_path)
@@ -82,12 +70,12 @@ def extract_text_from_excel(file_path: str) -> str:
         raise ValueError(f"Failed to process Excel file: {str(e)}")
     return "\n".join(all_text)
-def split_text_into_chunks(text: str, max_tokens: int = MAX_CHUNK_TOKENS) -> List[str]:
-    """Split text into chunks respecting MAX_CHUNK_TOKENS and PROMPT_OVERHEAD."""
-    effective_max_tokens = max_tokens - PROMPT_OVERHEAD
-    if effective_max_tokens <= 0:
-        raise ValueError(f"Effective max tokens ({effective_max_tokens}) must be positive.")
     lines = text.split("\n")
     chunks = []
     current_chunk = []
@@ -95,7 +83,7 @@ def split_text_into_chunks(text: str, max_tokens: int = MAX_CHUNK_TOKENS) -> Lis
     for line in lines:
         line_tokens = estimate_tokens(line)
-        if current_tokens + line_tokens > effective_max_tokens:
             if current_chunk:
                 chunks.append("\n".join(current_chunk))
             current_chunk = [line]
@@ -106,12 +94,11 @@ def split_text_into_chunks(text: str, max_tokens: int = MAX_CHUNK_TOKENS) -> Lis
     if current_chunk:
         chunks.append("\n".join(current_chunk))
     logger.info(f"Split text into {len(chunks)} chunks")
     return chunks
 def build_prompt_from_text(chunk: str) -> str:
-    """Build a prompt for analyzing a chunk of clinical data."""
     return f"""
 ### Unstructured Clinical Records
@@ -132,7 +119,7 @@ Please analyze the above and provide concise responses (max {MAX_NEW_TOKENS} tok
 """
 def init_agent():
-    """Initialize the TxAgent with optimized vLLM settings for A100 80GB."""
     default_tool_path = os.path.abspath("data/new_tool.json")
     target_tool_path = os.path.join(tool_cache_dir, "new_tool.json")
@@ -152,19 +139,17 @@ def init_agent():
     agent.init_model()
     return agent
-async def process_chunk(agent, chunk: str, chunk_index: int, total_chunks: int) -> Tuple[int, str, str]:
-    """Process a single chunk with enhanced error handling."""
-    logger.info(f"Processing chunk {chunk_index+1}/{total_chunks}")
-    prompt = build_prompt_from_text(chunk)
-    prompt_tokens = estimate_tokens(prompt)
-    if prompt_tokens > MAX_MODEL_TOKENS:
-        error_msg = f"❌ Chunk {chunk_index+1} prompt too long ({prompt_tokens} tokens). Skipping..."
-        logger.warning(error_msg)
-        return chunk_index, "", error_msg
-    response = ""
     try:
         for result in agent.run_gradio_chat(
             message=prompt,
             history=[],
@@ -182,143 +167,87 @@ async def process_chunk(agent, chunk: str, chunk_index: int, total_chunks: int)
                 for r in result:
                     if hasattr(r, "content"):
                         response += r.content
-        status = f"✅ Chunk {chunk_index+1} analysis complete"
-        logger.info(status)
     except Exception as e:
-        status = f"❌ Error analyzing chunk {chunk_index+1}: {str(e)}"
-        logger.error(status)
-        response = ""
-    return chunk_index, clean_response(response), status
-async def process_final_report(agent, file, chatbot_state: List[Dict[str, str]]) -> Tuple[List[Dict[str, str]], Union[str, None]]:
-    """Process the Excel file and generate a final report."""
-    messages = chatbot_state if chatbot_state else []
     report_path = None
-    if file is None or not hasattr(file, "name"):
-        messages.append({"role": "assistant", "content": "❌ Please upload a valid Excel file before analyzing."})
-        return messages, report_path
     try:
-        messages.append({"role": "user", "content": f"Processing Excel file: {os.path.basename(file.name)}"})
-        messages.append({"role": "assistant", "content": "⏳ Extracting and analyzing data..."})
-        # Extract text and split into chunks
         start_time = time.time()
-        extracted_text = extract_text_from_excel(file.name)
-        chunks = split_text_into_chunks(extracted_text, max_tokens=MAX_CHUNK_TOKENS)
-        logger.info(f"Extracted text and split into {len(chunks)} chunks in {time.time() - start_time:.2f} seconds")
-        chunk_responses = [None] * len(chunks)
-        batch_size = MAX_CONCURRENT
-        # Process chunks in batches
-        for batch_start in range(0, len(chunks), batch_size):
-            batch_chunks = chunks[batch_start:batch_start + batch_size]
-            batch_indices = list(range(batch_start, min(batch_start + batch_size, len(chunks))))
-            logger.info(f"Processing batch {batch_start//batch_size + 1}/{(len(chunks) + batch_size - 1)//batch_size}")
-            with ThreadPoolExecutor(max_workers=MAX_CONCURRENT) as executor:
-                futures = [
-                    executor.submit(lambda c, i: asyncio.run(process_chunk(agent, c, i, len(chunks))), chunk, i)
-                    for i, chunk in zip(batch_indices, batch_chunks)
-                ]
-                for future in as_completed(futures):
-                    chunk_index, response, status = future.result()
-                    chunk_responses[chunk_index] = response
-                    messages.append({"role": "assistant", "content": status})
-        # Filter out empty responses
-        chunk_responses = [r for r in chunk_responses if r]
-        if not chunk_responses:
-            messages.append({"role": "assistant", "content": "❌ No valid chunk responses to summarize."})
-            return messages, report_path
-        # Summarize chunk responses incrementally
-        summary = ""
-        current_summary_tokens = 0
-        for i, response in enumerate(chunk_responses):
-            response_tokens = estimate_tokens(response)
-            if current_summary_tokens + response_tokens > MAX_MODEL_TOKENS - PROMPT_OVERHEAD - MAX_NEW_TOKENS:
-                summary_prompt = f"Summarize the following analysis:\n\n{summary}\n\nProvide a concise summary."
-                summary_response = ""
-                try:
-                    for result in agent.run_gradio_chat(
-                        message=summary_prompt,
-                        history=[],
-                        temperature=0.2,
-                        max_new_tokens=MAX_NEW_TOKENS,
-                        max_token=MAX_MODEL_TOKENS,
-                        call_agent=False,
-                        conversation=[],
-                    ):
-                        if isinstance(result, str):
-                            summary_response += result
-                        elif hasattr(result, "content"):
-                            summary_response += result.content
-                        elif isinstance(result, list):
-                            for r in result:
-                                if hasattr(r, "content"):
-                                    summary_response += r.content
-                    summary = clean_response(summary_response)
-                    current_summary_tokens = estimate_tokens(summary)
-                except Exception as e:
-                    messages.append({"role": "assistant", "content": f"❌ Error summarizing intermediate results: {str(e)}"})
-                    return messages, report_path
-            summary += f"\n\n### Chunk {i+1} Analysis\n{response}"
-            current_summary_tokens += response_tokens
-        # Final summarization
-        final_prompt = f"Summarize the key findings from the following analyses:\n\n{summary}"
         messages.append({"role": "assistant", "content": "📊 Generating final report..."})
-        final_report_text = ""
-        try:
-            for result in agent.run_gradio_chat(
-                message=final_prompt,
-                history=[],
-                temperature=0.2,
-                max_new_tokens=MAX_NEW_TOKENS * 2,
-                max_token=MAX_MODEL_TOKENS,
-                call_agent=False,
-                conversation=[],
-            ):
-                if isinstance(result, str):
-                    final_report_text += result
-                elif hasattr(result, "content"):
-                    final_report_text += result.content
-                elif isinstance(result, list):
-                    for r in result:
-                        if hasattr(r, "content"):
-                            final_report_text += r.content
-        except Exception as e:
-            messages.append({"role": "assistant", "content": f"❌ Error generating final report: {str(e)}"})
-            return messages, report_path
-        final_report = f"# Final Clinical Report\n\n{clean_response(final_report_text)}"
-        messages[-1]["content"] = f"📊 Final Report:\n\n{clean_response(final_report_text)}"
-        # Save the report
         timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
         report_path = os.path.join(report_dir, f"report_{timestamp}.md")
         with open(report_path, 'w') as f:
             f.write(final_report)
-        messages.append({"role": "assistant", "content": f"✅ Report generated and saved: report_{timestamp}.md"})
-        logger.info(f"Total processing time: {time.time() - start_time:.2f} seconds")
-        return messages, report_path
     except Exception as e:
-        messages.append({"role": "assistant", "content": f"❌ Error processing file: {str(e)}"})
         logger.error(f"Processing failed: {str(e)}")
-        return messages, report_path
-def create_ui(agent):
-    """Create the Gradio interface."""
     with gr.Blocks(title="Clinical Analysis", css=".gradio-container {max-width: 900px}") as demo:
         gr.Markdown("## 🏥 Clinical Data Analysis (TxAgent)")
@@ -342,43 +271,32 @@ def create_ui(agent):
                 )
                 report_output = gr.File(
                     label="Download Report",
-                    visible=False,
-                    interactive=False
                 )
-        # State to maintain chatbot messages
-        chatbot_state = gr.State(value=[])
-        async def update_ui(file, current_state):
-            if file is None or not hasattr(file, "name"):
-                messages = current_state if current_state else []
-                messages.append({"role": "assistant", "content": "❌ Please upload a valid Excel file before analyzing."})
-                return messages, None
-            messages, report_path = await process_final_report(agent, file, current_state)
-            report_update = gr.update(visible=report_path is not None, value=report_path)
-            return messages, report_update
         analyze_btn.click(
-            fn=update_ui,
-            inputs=[file_input, chatbot_state],
             outputs=[chatbot, report_output],
-            api_name="analyze"
         )
     return demo
 if __name__ == "__main__":
     try:
         agent = init_agent()
         demo = create_ui(agent)
         demo.launch(
             server_name="0.0.0.0",
             server_port=7860,
             show_error=True,
             allowed_paths=[report_dir],
             share=False,
-            inline=False,
-            max_threads=40
         )
     except Exception as e:
         logger.error(f"Application failed: {str(e)}")

 import re
 from datetime import datetime
 import time
 import asyncio
 import logging
 from concurrent.futures import ThreadPoolExecutor, as_completed
 MAX_CHUNK_TOKENS = 32768   # Larger chunks to reduce number of chunks
 MAX_NEW_TOKENS = 512       # Optimized for fast generation
 PROMPT_OVERHEAD = 500      # Estimated tokens for prompt template
+MAX_CONCURRENT = 4         # Reduced concurrency to avoid vLLM issues
 # Setup logging
 logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
     return text.strip()
 def estimate_tokens(text: str) -> int:
+    return len(text) // 3.5 + 1  # Conservative estimate
 def extract_text_from_excel(file_path: str) -> str:
     all_text = []
     try:
         xls = pd.ExcelFile(file_path)
         raise ValueError(f"Failed to process Excel file: {str(e)}")
     return "\n".join(all_text)
+def split_text_into_chunks(text: str) -> List[str]:
+    """Split text into chunks respecting MAX_CHUNK_TOKENS and PROMPT_OVERHEAD"""
+    effective_max = MAX_CHUNK_TOKENS - PROMPT_OVERHEAD
+    if effective_max <= 0:
+        raise ValueError("Effective max tokens must be positive")
     lines = text.split("\n")
     chunks = []
     current_chunk = []
     for line in lines:
         line_tokens = estimate_tokens(line)
+        if current_tokens + line_tokens > effective_max:
             if current_chunk:
                 chunks.append("\n".join(current_chunk))
             current_chunk = [line]
     if current_chunk:
         chunks.append("\n".join(current_chunk))
     logger.info(f"Split text into {len(chunks)} chunks")
     return chunks
 def build_prompt_from_text(chunk: str) -> str:
     return f"""
 ### Unstructured Clinical Records
 """
 def init_agent():
+    """Initialize TxAgent with conservative settings to avoid vLLM issues"""
     default_tool_path = os.path.abspath("data/new_tool.json")
     target_tool_path = os.path.join(tool_cache_dir, "new_tool.json")
     agent.init_model()
     return agent
+def process_chunk_sync(agent, chunk: str, chunk_idx: int) -> Tuple[int, str]:
+    """Synchronous wrapper for chunk processing"""
     try:
+        prompt = build_prompt_from_text(chunk)
+        prompt_tokens = estimate_tokens(prompt)
+        if prompt_tokens > MAX_MODEL_TOKENS:
+            logger.warning(f"Chunk {chunk_idx} prompt too long ({prompt_tokens} tokens)")
+            return chunk_idx, ""
+        response = ""
         for result in agent.run_gradio_chat(
             message=prompt,
             history=[],
                 for r in result:
                     if hasattr(r, "content"):
                         response += r.content
+        return chunk_idx, clean_response(response)
     except Exception as e:
+        logger.error(f"Error processing chunk {chunk_idx}: {str(e)}")
+        return chunk_idx, ""
+async def process_file(agent: TxAgent, file_path: str) -> Generator[Tuple[List[Dict[str, str]], Union[str, None]], None, None]:
+    """Process the file with improved error handling and vLLM stability"""
+    messages = []
     report_path = None
     try:
+        # Initial messages
+        messages.append({"role": "user", "content": f"Processing file: {os.path.basename(file_path)}"})
+        messages.append({"role": "assistant", "content": "⏳ Extracting data from Excel..."})
+        yield messages, None
+        # Extract and chunk text
         start_time = time.time()
+        text = extract_text_from_excel(file_path)
+        chunks = split_text_into_chunks(text)
+        messages.append({"role": "assistant", "content": f"✅ Extracted {len(chunks)} chunks in {time.time()-start_time:.1f}s"})
+        yield messages, None
+        # Process chunks sequentially to avoid vLLM socket issues
+        chunk_responses = []
+        for idx, chunk in enumerate(chunks):
+            messages.append({"role": "assistant", "content": f"🔍 Processing chunk {idx+1}/{len(chunks)}..."})
+            yield messages, None
+            _, response = process_chunk_sync(agent, chunk, idx)
+            chunk_responses.append(response)
+            messages.append({"role": "assistant", "content": f"✅ Chunk {idx+1} processed"})
+            yield messages, None
+        # Combine and summarize
+        combined = "\n\n".join([r for r in chunk_responses if r])
         messages.append({"role": "assistant", "content": "📊 Generating final report..."})
+        yield messages, None
+        final_response = ""
+        for result in agent.run_gradio_chat(
+            message=f"Summarize these clinical findings:\n\n{combined}",
+            history=[],
+            temperature=0.2,
+            max_new_tokens=MAX_NEW_TOKENS*2,
+            max_token=MAX_MODEL_TOKENS,
+            call_agent=False,
+            conversation=[],
+        ):
+            if isinstance(result, str):
+                final_response += result
+            elif hasattr(result, "content"):
+                final_response += result.content
+            elif isinstance(result, list):
+                for r in result:
+                    if hasattr(r, "content"):
+                        final_response += r.content
+            messages[-1]["content"] = f"📊 Generating final report...\n\n{clean_response(final_response)}"
+            yield messages, None
+        # Save report
+        final_report = f"# Final Clinical Report\n\n{clean_response(final_response)}"
         timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
         report_path = os.path.join(report_dir, f"report_{timestamp}.md")
         with open(report_path, 'w') as f:
             f.write(final_report)
+        messages.append({"role": "assistant", "content": f"✅ Report saved: report_{timestamp}.md"})
+        yield messages, report_path
     except Exception as e:
         logger.error(f"Processing failed: {str(e)}")
+        messages.append({"role": "assistant", "content": f"❌ Error: {str(e)}"})
+        yield messages, None
+def create_ui(agent: TxAgent):
+    """Create the Gradio interface with simplified interaction"""
     with gr.Blocks(title="Clinical Analysis", css=".gradio-container {max-width: 900px}") as demo:
         gr.Markdown("## 🏥 Clinical Data Analysis (TxAgent)")
                 )
                 report_output = gr.File(
                     label="Download Report",
+                    visible=False
                 )
         analyze_btn.click(
+            fn=lambda file: process_file(agent, file.name) if file else ([{"role": "assistant", "content": "❌ Please upload a file"}], None),
+            inputs=[file_input],
             outputs=[chatbot, report_output],
+            concurrency_limit=1  # Ensure sequential processing
         )
     return demo
 if __name__ == "__main__":
     try:
+        # Initialize with conservative settings
         agent = init_agent()
         demo = create_ui(agent)
+        # Launch with stability optimizations
         demo.launch(
             server_name="0.0.0.0",
             server_port=7860,
             show_error=True,
             allowed_paths=[report_dir],
             share=False,
+            max_threads=4  # Reduced thread count for stability
         )
     except Exception as e:
         logger.error(f"Application failed: {str(e)}")