Spaces:

RocketFarmStudios
/

TxAgent-Api

Runtime error

App Files Files Community

Ali2206 commited on May 18, 2025

Commit

61c414a

verified ·

1 Parent(s): 6b4270f

Update src/txagent/txagent.py

Browse files

Files changed (1) hide show

src/txagent/txagent.py +173 -158

src/txagent/txagent.py CHANGED Viewed

@@ -1,163 +1,178 @@
 import os
-import logging
 import torch
-import pdfplumber
-import pandas as pd
-from typing import Dict, Optional, Union
-from transformers import AutoModelForCausalLM, AutoTokenizer
-from sentence_transformers import SentenceTransformer
-logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
-logger = logging.getLogger("TxAgent")
-class TxAgent:
-    def __init__(self,
-                 model_name: str,
-                 rag_model_name: str,
-                 tool_files_dict: Optional[Dict] = None,
-                 force_finish: bool = True,
-                 enable_checker: bool = True,
-                 step_rag_num: int = 4,
-                 seed: Optional[int] = None):
-        """Initialize TxAgent without vLLM dependencies."""
-        self.model_name = model_name
-        self.rag_model_name = rag_model_name
-        self.tool_files_dict = tool_files_dict or {}
-        self.force_finish = force_finish
-        self.enable_checker = enable_checker
-        self.step_rag_num = step_rag_num
-        self.seed = seed
-        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-        self.model = None
-        self.tokenizer = None
-        self.rag_model = None
-        logger.info(f"Initialized TxAgent with model: {model_name} on device: {self.device}")
-    def init_model(self):
-        """Initialize models using transformers only."""
-        self.load_llm_model()
-        self.load_rag_model()
-        logger.info("Model initialization complete")
-    def load_llm_model(self):
-        """Load the main LLM model using transformers."""
-        try:
-            logger.info(f"Loading LLM model: {self.model_name}")
-            self.tokenizer = AutoTokenizer.from_pretrained(
-                self.model_name,
-                cache_dir=os.getenv("HF_HOME")
-            )
-            self.model = AutoModelForCausalLM.from_pretrained(
-                self.model_name,
-                torch_dtype=torch.float16 if self.device.type == "cuda" else torch.float32,
-                device_map="auto",
-                cache_dir=os.getenv("HF_HOME")
-            )
-            logger.info(f"LLM model loaded on {self.device}")
-        except Exception as e:
-            logger.error(f"Failed to load LLM model: {str(e)}")
-            raise RuntimeError(f"Failed to load LLM model: {str(e)}")
-    def load_rag_model(self):
-        """Load the RAG model."""
-        try:
-            logger.info(f"Loading RAG model: {self.rag_model_name}")
-            self.rag_model = SentenceTransformer(
-                self.rag_model_name,
-                device=str(self.device)
-            )
-            logger.info("RAG model loaded successfully")
-        except Exception as e:
-            logger.error(f"Failed to load RAG model: {str(e)}")
-            raise RuntimeError(f"Failed to load RAG model: {str(e)}")
-    def process_document(self, file_path: str) -> Dict[str, Union[str, Dict]]:
-        """Process a document and return real analysis results."""
-        try:
-            text = self.extract_text_from_file(file_path)
-            if not text:
-                return {
-                    "status": "error",
-                    "message": "Failed to extract text",
-                    "model": self.model_name
-                }
-            analysis = self.analyze_text(text)
-            return {
-                "status": "success",
                 "analysis": analysis,
-                "model": self.model_name
-            }
-        except Exception as e:
-            logger.error(f"Document processing failed: {str(e)}")
-            return {
-                "status": "error",
-                "message": str(e),
-                "model": self.model_name
-            }
-    def extract_text_from_file(self, file_path: str) -> Optional[str]:
-        """Extract text from PDF, CSV, or Excel files."""
-        try:
-            if file_path.endswith('.pdf'):
-                with pdfplumber.open(file_path) as pdf:
-                    return "\n".join(
-                        page.extract_text()
-                        for page in pdf.pages
-                        if page.extract_text()
-                    )
-            elif file_path.endswith('.csv'):
-                df = pd.read_csv(file_path)
-                return df.to_string()
-            elif file_path.endswith(('.xlsx', '.xls')):
-                df = pd.read_excel(file_path)
-                return df.to_string()
-            logger.warning(f"Unsupported file type: {file_path}")
-            return None
-        except Exception as e:
-            logger.error(f"Text extraction failed: {str(e)}")
-            raise RuntimeError(f"Text extraction failed: {str(e)}")
-    def analyze_text(self, text: str, max_tokens: int = 1000) -> str:
-        """Analyze extracted text using the LLM."""
-        try:
-            prompt = f"""Analyze this medical document:
-1. Diagnostic patterns
-2. Medication issues
-3. Recommended follow-ups
-Document:
-{text[:8000]}  # Truncate to avoid token limits
-"""
-            inputs = self.tokenizer(prompt, return_tensors="pt").to(self.device)
-            outputs = self.model.generate(
-                **inputs,
-                max_new_tokens=max_tokens,
-                pad_token_id=self.tokenizer.eos_token_id
-            )
-            return self.tokenizer.decode(outputs[0], skip_special_tokens=True)
-        except Exception as e:
-            logger.error(f"Text analysis failed: {str(e)}")
-            raise RuntimeError(f"Analysis failed: {str(e)}")
-    def cleanup(self):
-        """Clean up resources."""
-        if hasattr(self, 'model'):
-            del self.model
-        if hasattr(self, 'rag_model'):
-            del self.rag_model
-        torch.cuda.empty_cache()
-        logger.info("TxAgent resources cleaned up")
-    def __del__(self):
-        """Destructor to ensure proper cleanup."""
-        self.cleanup()

+# app.py - FastAPI application
 import os
+import sys
+import json
+import shutil
+from fastapi import FastAPI, HTTPException, UploadFile, File
+from fastapi.responses import JSONResponse
+from fastapi.middleware.cors import CORSMiddleware
+from typing import List, Dict, Optional
 import torch
+from datetime import datetime
+from pydantic import BaseModel
+# Configuration
+persistent_dir = "/data/hf_cache"
+model_cache_dir = os.path.join(persistent_dir, "txagent_models")
+tool_cache_dir = os.path.join(persistent_dir, "tool_cache")
+file_cache_dir = os.path.join(persistent_dir, "cache")
+report_dir = os.path.join(persistent_dir, "reports")
+# Create directories if they don't exist
+os.makedirs(model_cache_dir, exist_ok=True)
+os.makedirs(tool_cache_dir, exist_ok=True)
+os.makedirs(file_cache_dir, exist_ok=True)
+os.makedirs(report_dir, exist_ok=True)
+# Set environment variables
+os.environ["HF_HOME"] = model_cache_dir
+os.environ["TRANSFORMERS_CACHE"] = model_cache_dir
+# Set up Python path
+current_dir = os.path.dirname(os.path.abspath(__file__))
+src_path = os.path.abspath(os.path.join(current_dir, "src"))
+sys.path.insert(0, src_path)
+# Request models
+class ChatRequest(BaseModel):
+    message: str
+    temperature: float = 0.7
+    max_new_tokens: int = 512
+    history: Optional[List[Dict]] = None
+class MultistepRequest(BaseModel):
+    message: str
+    temperature: float = 0.7
+    max_new_tokens: int = 512
+    max_round: int = 5
+# Initialize FastAPI app
+app = FastAPI(
+    title="TxAgent API",
+    description="API for TxAgent medical document analysis",
+    version="1.0.0"
+)
+# CORS configuration
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# Initialize agent at startup
+agent = None
+@app.on_event("startup")
+async def startup_event():
+    global agent
+    try:
+        agent = init_agent()
+    except Exception as e:
+        raise RuntimeError(f"Failed to initialize agent: {str(e)}")
+def init_agent():
+    """Initialize and return the TxAgent instance"""
+    tool_path = os.path.join(tool_cache_dir, "new_tool.json")
+    if not os.path.exists(tool_path):
+        shutil.copy(os.path.abspath("data/new_tool.json"), tool_path)
+    agent = TxAgent(
+        model_name="mims-harvard/TxAgent-T1-Llama-3.1-8B",
+        rag_model_name="mims-harvard/ToolRAG-T1-GTE-Qwen2-1.5B",
+        tool_files_dict={"new_tool": tool_path},
+        enable_finish=True,
+        enable_rag=False,
+        force_finish=True,
+        enable_checker=True,
+        step_rag_num=4,
+        seed=100
+    )
+    agent.init_model()
+    return agent
+@app.post("/chat")
+async def chat_endpoint(request: ChatRequest):
+    """Handle chat conversations"""
+    try:
+        response = agent.chat(
+            message=request.message,
+            history=request.history,
+            temperature=request.temperature,
+            max_new_tokens=request.max_new_tokens
+        )
+        return JSONResponse({
+            "status": "success",
+            "response": response,
+            "timestamp": datetime.now().isoformat()
+        })
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@app.post("/multistep")
+async def multistep_endpoint(request: MultistepRequest):
+    """Run multi-step reasoning"""
+    try:
+        response = agent.run_multistep_agent(
+            message=request.message,
+            temperature=request.temperature,
+            max_new_tokens=request.max_new_tokens,
+            max_round=request.max_round
+        )
+        return JSONResponse({
+            "status": "success",
+            "response": response,
+            "timestamp": datetime.now().isoformat()
+        })
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@app.post("/analyze")
+async def analyze_document(file: UploadFile = File(...)):
+    """Analyze a medical document"""
+    try:
+        # Save the uploaded file temporarily
+        temp_path = os.path.join(file_cache_dir, file.filename)
+        with open(temp_path, "wb") as f:
+            f.write(await file.read())
+        # Process the document
+        text = agent.extract_text_from_file(temp_path)
+        analysis = agent.analyze_text(text)
+        # Generate report
+        report_path = os.path.join(report_dir, f"{file.filename}.json")
+        with open(report_path, "w") as f:
+            json.dump({
+                "filename": file.filename,
                 "analysis": analysis,
+                "timestamp": datetime.now().isoformat()
+            }, f)
+        # Clean up
+        os.remove(temp_path)
+        return JSONResponse({
+            "status": "success",
+            "analysis": analysis,
+            "report_path": report_path,
+            "timestamp": datetime.now().isoformat()
+        })
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@app.get("/status")
+async def service_status():
+    """Check service status"""
+    return {
+        "status": "running",
+        "version": "1.0.0",
+        "model": agent.model_name if agent else "not loaded",
+        "device": str(agent.device) if agent else "unknown"
+    }
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=8000)