Spaces:

PenguinMan
/

MediDoc

Runtime error

App Files Files Community

PenguinMan commited on Jun 7, 2025

Commit

a4c7002

verified ·

1 Parent(s): cf6fecb

Upload api.py

Browse files

Files changed (1) hide show

api.py +321 -0

api.py ADDED Viewed

	@@ -0,0 +1,321 @@

+from fastapi import FastAPI, UploadFile, File, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel
+import sqlite3
+import os
+import pytesseract
+from PIL import Image
+from pdf2image import convert_from_path
+from groq import Groq
+import json
+import logging
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+# --- Configuration ---
+DATABASE = "medidoc.db"
+UPLOAD_FOLDER = "uploads"
+os.makedirs(UPLOAD_FOLDER, exist_ok=True)
+# --- Groq Client Initialization ---
+# Use environment variable for API key
+GROQ_API_KEY = os.getenv("GROQ_API_KEY", "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")
+client = Groq(api_key=GROQ_API_KEY)
+# --- Database Setup ---
+def init_db():
+    try:
+        conn = sqlite3.connect(DATABASE)
+        cursor = conn.cursor()
+        cursor.execute("""
+        CREATE TABLE IF NOT EXISTS documents (
+            id INTEGER PRIMARY KEY AUTOINCREMENT,
+            filename TEXT NOT NULL,
+            category TEXT,
+            document_date TEXT,
+            doctor_name TEXT,
+            hospital_name TEXT,
+            summary TEXT,
+            content TEXT
+        )
+        """)
+        conn.commit()
+        conn.close()
+        logger.info("Database initialized successfully")
+    except Exception as e:
+        logger.error(f"Database initialization failed: {e}")
+init_db()
+# --- FastAPI App ---
+app = FastAPI(title="MediDoc API", version="1.0.0")
+# Add CORS middleware
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],  # In production, specify exact origins
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# --- Helper Functions ---
+def extract_text_from_file(filepath: str) -> str:
+    """Extract text from PDF or image files"""
+    try:
+        if not os.path.exists(filepath):
+            logger.error(f"File not found: {filepath}")
+            return ""
+        if filepath.lower().endswith(".pdf"):
+            pages = convert_from_path(filepath)
+            text = ""
+            for page in pages:
+                text += pytesseract.image_to_string(page) + "\n"
+            return text.strip()
+        else:
+            # Handle image files
+            with Image.open(filepath) as img:
+                text = pytesseract.image_to_string(img)
+            return text.strip()
+    except Exception as e:
+        logger.error(f"Error extracting text from {filepath}: {e}")
+        return ""
+def process_with_llm(text: str) -> dict:
+    """Analyze medical text using Groq's Llama model"""
+    if not text.strip():
+        return {
+            "category": "Empty Document",
+            "document_date": "N/A",
+            "doctor_name": "N/A",
+            "hospital_name": "N/A",
+            "summary": "Document appears to be empty or text could not be extracted.",
+        }
+    system_prompt = """
+    You are an expert medical data extraction assistant. Analyze the provided text from a medical document and extract key information.
+    Respond ONLY with a valid JSON object containing exactly these keys:
+    - "category": Choose from "Prescription", "Lab Report", "Medical Bill", "Pharmacy Bill", "Discharge Summary", "Consultation Notes", "Other"
+    - "document_date": Date in YYYY-MM-DD format. If not found, use "N/A"
+    - "doctor_name": Full name of the doctor. If not found, use "N/A"
+    - "hospital_name": Name of hospital/clinic. If not found, use "N/A"
+    - "summary": A brief, clear summary in 1-2 sentences describing what this document is about
+    Return only the JSON object, no other text.
+    """
+    fallback_response = {
+        "category": "Other",
+        "document_date": "N/A",
+        "doctor_name": "N/A",
+        "hospital_name": "N/A",
+        "summary": "Medical document processed but specific information could not be extracted.",
+    }
+    try:
+        completion = client.chat.completions.create(
+            model="llama-3.1-8b-instant",
+            messages=[
+                {"role": "system", "content": system_prompt},
+                {"role": "user", "content": f"Medical document text:\n\n{text[:2000]}"}  # Limit text length
+            ],
+            temperature=0.1,
+            max_tokens=300,
+            top_p=1,
+            stream=False,
+        )
+        response_content = completion.choices[0].message.content.strip()
+        # Clean up the response
+        if response_content.startswith("```json"):
+            response_content = response_content[7:]
+        if response_content.endswith("```"):
+            response_content = response_content[:-3]
+        response_content = response_content.strip()
+        parsed_response = json.loads(response_content)
+        # Validate required keys
+        required_keys = ["category", "document_date", "doctor_name", "hospital_name", "summary"]
+        for key in required_keys:
+            if key not in parsed_response:
+                parsed_response[key] = "N/A"
+        return parsed_response
+    except json.JSONDecodeError as e:
+        logger.error(f"JSON Parsing Error: {e}\nRaw Response: {response_content}")
+        return fallback_response
+    except Exception as e:
+        logger.error(f"Error with Groq API: {e}")
+        return fallback_response
+# --- API Endpoints ---
+@app.get("/")
+async def root():
+    return {"message": "MediDoc API is running"}
+@app.post("/upload/")
+async def upload_document(file: UploadFile = File(...)):
+    """Upload and process a medical document"""
+    try:
+        # Validate file type
+        allowed_types = ['application/pdf', 'image/jpeg', 'image/jpg', 'image/png']
+        if file.content_type not in allowed_types:
+            raise HTTPException(status_code=400, detail="Only PDF and image files are allowed")
+        # Save uploaded file
+        filepath = os.path.join(UPLOAD_FOLDER, file.filename)
+        with open(filepath, "wb") as buffer:
+            content = await file.read()
+            if not content:
+                raise HTTPException(status_code=400, detail="Uploaded file is empty")
+            buffer.write(content)
+        logger.info(f"File saved: {filepath}")
+        # Extract text
+        text = extract_text_from_file(filepath)
+        if not text.strip():
+            # Clean up the file
+            os.remove(filepath)
+            raise HTTPException(status_code=400, detail="Could not extract text from the uploaded file")
+        # Process with LLM
+        processed_data = process_with_llm(text)
+        # Save to database
+        conn = sqlite3.connect(DATABASE)
+        cursor = conn.cursor()
+        cursor.execute(
+            """INSERT INTO documents
+               (filename, category, document_date, doctor_name, hospital_name, summary, content)
+               VALUES (?, ?, ?, ?, ?, ?, ?)""",
+            (
+                file.filename,
+                processed_data.get("category", "N/A"),
+                processed_data.get("document_date", "N/A"),
+                processed_data.get("doctor_name", "N/A"),
+                processed_data.get("hospital_name", "N/A"),
+                processed_data.get("summary", "N/A"),
+                text
+            ),
+        )
+        conn.commit()
+        conn.close()
+        logger.info(f"Document processed successfully: {file.filename}")
+        return {"filename": file.filename, "info": processed_data, "status": "success"}
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"Unexpected error processing file: {e}")
+        raise HTTPException(status_code=500, detail="Internal server error occurred while processing the file")
+@app.get("/documents/")
+def get_documents():
+    """Retrieve all processed documents"""
+    try:
+        conn = sqlite3.connect(DATABASE)
+        conn.row_factory = sqlite3.Row
+        cursor = conn.cursor()
+        cursor.execute("""
+            SELECT id, filename, category, document_date, doctor_name, hospital_name, summary
+            FROM documents
+            ORDER BY
+                CASE WHEN document_date = 'N/A' THEN 1 ELSE 0 END,
+                document_date DESC
+        """)
+        documents = [dict(row) for row in cursor.fetchall()]
+        conn.close()
+        return {"documents": documents, "count": len(documents)}
+    except Exception as e:
+        logger.error(f"Error retrieving documents: {e}")
+        raise HTTPException(status_code=500, detail="Could not retrieve documents")
+class SearchResult(BaseModel):
+    answer: str
+    sources: list
+@app.get("/search/", response_model=SearchResult)
+def search_medical_history(query: str):
+    """Search through medical documents using natural language"""
+    if not query.strip():
+        raise HTTPException(status_code=400, detail="Search query cannot be empty")
+    try:
+        conn = sqlite3.connect(DATABASE)
+        cursor = conn.cursor()
+        cursor.execute("SELECT filename, content, summary, category FROM documents")
+        all_docs = cursor.fetchall()
+        conn.close()
+        if not all_docs:
+            return {"answer": "No documents have been uploaded yet. Please upload some medical documents first.", "sources": []}
+        # Prepare context for the AI
+        context_parts = []
+        for i, doc in enumerate(all_docs):
+            filename, content, summary, category = doc
+            context_parts.append(f"Document {i+1}: {filename}\nCategory: {category}\nSummary: {summary}\nContent: {content[:1500]}")
+        context = "\n\n---\n\n".join(context_parts)
+        system_prompt = f"""
+        You are a medical assistant helping a patient understand their medical history.
+        Answer the user's question based ONLY on the provided medical documents.
+        Guidelines:
+        - Provide a clear, helpful answer
+        - Mention specific document names when referencing information
+        - If information is not available in the documents, say so clearly
+        - Be concise but informative
+        - Use medical terminology appropriately but explain complex terms
+        Available Documents:
+        {context}
+        """
+        completion = client.chat.completions.create(
+            model="llama-3.1-8b-instant",
+            messages=[
+                {"role": "system", "content": system_prompt},
+                {"role": "user", "content": query}
+            ],
+            temperature=0.2,
+            max_tokens=800,
+        )
+        answer = completion.choices[0].message.content
+        # Find relevant sources mentioned in the answer
+        sources = []
+        for doc in all_docs:
+            filename = doc[0]
+            if filename.lower() in answer.lower():
+                sources.append({
+                    "filename": filename,
+                    "summary": doc[2],
+                    "category": doc[3]
+                })
+        return {"answer": answer, "sources": sources}
+    except Exception as e:
+        logger.error(f"Error during search: {e}")
+        raise HTTPException(status_code=500, detail="Search service is currently unavailable")
+@app.get("/health")
+def health_check():
+    """Health check endpoint"""
+    return {"status": "healthy", "database": "connected"}
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=8000)