Spaces:

Hadiil
/

google-fastapi

Runtime error

App Files Files Community

Hadiil commited on Mar 23, 2025

Commit

29ec3ba

verified ·

1 Parent(s): 069103b

Update app.py

Browse files

Files changed (1) hide show

app.py +147 -83

app.py CHANGED Viewed

@@ -1,15 +1,18 @@
 from fastapi import FastAPI, UploadFile, File, HTTPException, Form
 from fastapi.staticfiles import StaticFiles
 from fastapi.responses import RedirectResponse, JSONResponse
-from transformers import pipeline, MarianMTModel, MarianTokenizer
 import logging
 from PIL import Image
 import io
 from docx import Document
 import fitz  # PyMuPDF
 import pandas as pd
-from tenacity import retry, stop_after_attempt, wait_exponential
-from functools import lru_cache
 from fastapi.middleware.cors import CORSMiddleware
 # Configure logging
@@ -30,119 +33,148 @@ app.add_middleware(
 # Serve static files (HTML, CSS, JS)
 app.mount("/static", StaticFiles(directory="static"), name="static")
-# Translation models
 translation_models = {
     "fr": "Helsinki-NLP/opus-mt-en-fr",
     "es": "Helsinki-NLP/opus-mt-en-es",
     "de": "Helsinki-NLP/opus-mt-en-de"
 }
-# Retry logic for model loading
-@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
-def load_model_with_retry(model_name, task, use_fast=True):
-    logger.info(f"Loading model: {model_name}")
-    return pipeline(task, model=model_name, use_fast=use_fast)
-# Lazy-loading pipelines
-@lru_cache(maxsize=1)
-def get_multimodal_pipeline():
-    return load_model_with_retry("Salesforce/blip-image-captioning-base", "image-to-text")
-@lru_cache(maxsize=1)
-def get_text_pipeline():
-    return load_model_with_retry("t5-small", "text2text-generation")
-@lru_cache(maxsize=3)
-def get_translation_pipeline(target_language):
-    model_name = translation_models.get(target_language, "Helsinki-NLP/opus-mt-en-de")
-    tokenizer = MarianTokenizer.from_pretrained(model_name)
-    model = MarianMTModel.from_pretrained(model_name)
-    return pipeline("translation_en_to_xx", model=model, tokenizer=tokenizer)
-# Root endpoint
 @app.get("/")
 def read_root():
     return RedirectResponse(url="/static/index.html")
-# Summarize text endpoint
 @app.post("/summarize")
-async def summarize_text(file: UploadFile = File(None), text: str = Form(None)):
-    try:
-        if file:
             text = await extract_text_from_file(file)
-        elif not text:
-            raise HTTPException(status_code=400, detail="No text or file provided")
-        text_pipeline = get_text_pipeline()
         summary = text_pipeline(f"summarize: {text}", max_length=100)
         return {"summary": summary[0]['generated_text']}
     except Exception as e:
-        logger.error(f"Error in summarization: {e}")
-        raise HTTPException(status_code=500, detail="Failed to summarize text. Please try again.")
-# Image captioning endpoint
 @app.post("/caption")
 async def caption_image(file: UploadFile = File(...)):
     try:
         image_data = await file.read()
         image = Image.open(io.BytesIO(image_data))
-        multimodal_pipeline = get_multimodal_pipeline()
         caption = multimodal_pipeline(image)
         return {"caption": caption[0]['generated_text']}
     except Exception as e:
-        logger.error(f"Error in image captioning: {e}")
-        raise HTTPException(status_code=500, detail="Failed to generate caption. Please try again.")
-# Translation endpoint
 @app.post("/translate")
-async def translate_document(file: UploadFile = File(None), text: str = Form(None), target_language: str = Form(...)):
     try:
-        if file:
-            text = await extract_text_from_file(file)
-        elif not text:
-            raise HTTPException(status_code=400, detail="No text or file provided")
-        translation_pipeline = get_translation_pipeline(target_language)
-        translated = translation_pipeline(text)
-        return {"translated_text": translated[0]['translation_text']}
     except Exception as e:
-        logger.error(f"Error in translation: {e}")
-        raise HTTPException(status_code=500, detail="Failed to translate text. Please try again.")
-# Question answering endpoint
 @app.post("/answer")
-async def answer_question(file: UploadFile = File(None), text: str = Form(None), question: str = Form(...)):
-    try:
-        if file:
             text = await extract_text_from_file(file)
-        elif not text:
-            raise HTTPException(status_code=400, detail="No text or file provided")
-        text_pipeline = get_text_pipeline()
         answer = text_pipeline(f"question: {question} context: {text}")
         return {"answer": answer[0]['generated_text']}
     except Exception as e:
-        logger.error(f"Error in question answering: {e}")
-        raise HTTPException(status_code=500, detail="Failed to answer the question. Please try again.")
-# Visual question answering endpoint
 @app.post("/vqa")
 async def visual_question_answering(file: UploadFile = File(...), question: str = Form(...)):
     try:
         image_data = await file.read()
         image = Image.open(io.BytesIO(image_data))
-        multimodal_pipeline = get_multimodal_pipeline()
         answer = multimodal_pipeline(image, question=question)
         return {"answer": answer[0]['generated_text']}
     except Exception as e:
-        logger.error(f"Error in visual question answering: {e}")
-        raise HTTPException(status_code=500, detail="Failed to answer the question. Please try again.")
-# Data visualization endpoint
 @app.post("/visualize")
-async def visualize_data(file: UploadFile = File(...), request: str = Form(...)):
     try:
         df = pd.read_excel(io.BytesIO(await file.read()))
         if "bar" in request.lower():
             code = f"""
 import matplotlib.pyplot as plt
@@ -167,33 +199,65 @@ import seaborn as sns
 sns.pairplot(df)
 plt.show()
 """
-        return {"code": code}
     except Exception as e:
-        logger.error(f"Error in data visualization: {e}")
-        raise HTTPException(status_code=500, detail="Failed to generate visualization code. Please try again.")
-# Helper function to extract text from files
 async def extract_text_from_file(file: UploadFile):
     try:
         file_content = await file.read()
         if file.filename.endswith(".pdf"):
-            doc = fitz.open(stream=file_content, filetype="pdf")
-            text = ""
-            for page in doc:
-                text += page.get_text()
-            return text
         elif file.filename.endswith(".docx"):
-            doc = Document(io.BytesIO(file_content))
-            return "\n".join([para.text for para in doc.paragraphs])
         elif file.filename.endswith(".txt"):
-            return file_content.decode("utf-8")
-        else:
-            raise HTTPException(status_code=400, detail="Unsupported file format")
     except Exception as e:
         logger.error(f"Error extracting text from file: {e}")
-        raise HTTPException(status_code=500, detail="Failed to extract text from file. Please try again.")
-# Run the application
 if __name__ == "__main__":
     import uvicorn
-    uvicorn.run(app, host="0.0.0.0", port=7860)

+import os
 from fastapi import FastAPI, UploadFile, File, HTTPException, Form
 from fastapi.staticfiles import StaticFiles
 from fastapi.responses import RedirectResponse, JSONResponse
+from transformers import pipeline
 import logging
 from PIL import Image
 import io
 from docx import Document
 import fitz  # PyMuPDF
 import pandas as pd
+import matplotlib.pyplot as plt
+import seaborn as sns
+import uuid
+from transformers import MarianMTModel, MarianTokenizer
 from fastapi.middleware.cors import CORSMiddleware
 # Configure logging
 # Serve static files (HTML, CSS, JS)
 app.mount("/static", StaticFiles(directory="static"), name="static")
+# Load models
+multimodal_pipeline = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base", use_fast=True)
+text_pipeline = pipeline("text2text-generation", model="t5-small", use_fast=True)
 translation_models = {
     "fr": "Helsinki-NLP/opus-mt-en-fr",
     "es": "Helsinki-NLP/opus-mt-en-es",
     "de": "Helsinki-NLP/opus-mt-en-de"
 }
 @app.get("/")
 def read_root():
     return RedirectResponse(url="/static/index.html")
 @app.post("/summarize")
+async def summarize_text(
+    file: UploadFile = File(None),
+    text: str = Form(None)
+):
+    logger.info(f"Received request: file={file}, text={text}")  # Debugging
+    if file:
+        logger.info(f"Received document for summarization: {file.filename}")
+        try:
             text = await extract_text_from_file(file)
+        except Exception as e:
+            logger.error(f"Error extracting text from file: {e}")
+            raise HTTPException(status_code=400, detail=str(e))
+    elif text:
+        logger.info("Received manual text for summarization")
+    else:
+        logger.error("No file or text provided")  # Debugging
+        raise HTTPException(status_code=400, detail="No file or text provided")
+    try:
         summary = text_pipeline(f"summarize: {text}", max_length=100)
+        logger.info(f"Generated summary: {summary[0]['generated_text']}")
         return {"summary": summary[0]['generated_text']}
     except Exception as e:
+        logger.error(f"Error during summarization: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
 @app.post("/caption")
 async def caption_image(file: UploadFile = File(...)):
+    logger.info(f"Received image for captioning: {file.filename}")
     try:
         image_data = await file.read()
         image = Image.open(io.BytesIO(image_data))
+        # Validate image format
+        if image.format not in ["JPEG", "PNG"]:
+            raise ValueError("Unsupported image format. Please upload a JPEG or PNG file.")
         caption = multimodal_pipeline(image)
+        logger.info(f"Generated caption: {caption[0]['generated_text']}")
         return {"caption": caption[0]['generated_text']}
     except Exception as e:
+        logger.error(f"Error during image captioning: {e}")
+        raise HTTPException(status_code=400, detail=str(e))
 @app.post("/translate")
+async def translate_document(
+    file: UploadFile = File(...),
+    target_language: str = Form(...)
+):
+    logger.info(f"Received document for translation: {file.filename}")
+    logger.info(f"Target language: {target_language}")
     try:
+        text = await extract_text_from_file(file)
+        if target_language in translation_models:
+            model_name = translation_models[target_language]
+        else:
+            model_name = "Helsinki-NLP/opus-mt-en-de"  # Default to German
+        tokenizer = MarianTokenizer.from_pretrained(model_name)
+        model = MarianMTModel.from_pretrained(model_name)
+        translated = model.generate(**tokenizer(text, return_tensors="pt", truncation=True))
+        translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
+        return {"translated_text": translated_text}
     except Exception as e:
+        logger.error(f"Error during document translation: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
 @app.post("/answer")
+async def answer_question(
+    file: UploadFile = File(None),
+    text: str = Form(None),
+    question: str = Form(...)
+):
+    if file:
+        logger.info(f"Received document for question answering: {file.filename}")
+        try:
             text = await extract_text_from_file(file)
+        except Exception as e:
+            logger.error(f"Error extracting text from file: {e}")
+            raise HTTPException(status_code=400, detail=str(e))
+    elif text:
+        logger.info("Received manual text for question answering")
+    else:
+        raise HTTPException(status_code=400, detail="No file or text provided")
+    try:
         answer = text_pipeline(f"question: {question} context: {text}")
+        logger.info(f"Generated answer: {answer[0]['generated_text']}")
         return {"answer": answer[0]['generated_text']}
     except Exception as e:
+        logger.error(f"Error during question answering: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
 @app.post("/vqa")
 async def visual_question_answering(file: UploadFile = File(...), question: str = Form(...)):
+    logger.info(f"Received image for visual question answering: {file.filename}")
+    logger.info(f"Received question: {question}")
     try:
         image_data = await file.read()
         image = Image.open(io.BytesIO(image_data))
+        # Validate image format
+        if image.format not in ["JPEG", "PNG"]:
+            raise ValueError("Unsupported image format. Please upload a JPEG or PNG file.")
         answer = multimodal_pipeline(image, question=question)
+        logger.info(f"Generated answer: {answer[0]['generated_text']}")
         return {"answer": answer[0]['generated_text']}
     except Exception as e:
+        logger.error(f"Error during visual question answering: {e}")
+        raise HTTPException(status_code=400, detail=str(e))
 @app.post("/visualize")
+async def visualize_data(
+    file: UploadFile = File(...),
+    request: str = Form(...)
+):
+    logger.info(f"Received Excel file for visualization: {file.filename}")
+    logger.info(f"Received visualization request: {request}")
     try:
         df = pd.read_excel(io.BytesIO(await file.read()))
         if "bar" in request.lower():
             code = f"""
 import matplotlib.pyplot as plt
 sns.pairplot(df)
 plt.show()
 """
+        code_filename = f"visualization_{uuid.uuid4()}.py"
+        with open(code_filename, "w") as f:
+            f.write(code)
+        return {"code": code, "filename": code_filename}
     except Exception as e:
+        logger.error(f"Error during visualization code generation: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
 async def extract_text_from_file(file: UploadFile):
     try:
         file_content = await file.read()
+        if not file_content:
+            logger.error("Uploaded file is empty.")
+            raise ValueError("Uploaded file is empty.")
+        # Check file size (e.g., limit to 10MB)
+        if len(file_content) > 10 * 1024 * 1024:  # 10MB
+            logger.error("File size exceeds the limit (10MB).")
+            raise ValueError("File size exceeds the limit (10MB).")
+        # Check file type
+        if not file.filename.lower().endswith((".pdf", ".docx", ".txt")):
+            logger.error(f"Unsupported files format: {file.filename}")
+            raise ValueError("Unsupported file format. Please upload a PDF, DOCX, or TXT file.")
         if file.filename.endswith(".pdf"):
+            try:
+                # Log the first few bytes of the file for debugging
+                logger.info(f"First 100 bytes of the file: {file_content[:100]}")
+                # Attempt to open the PDF
+                doc = fitz.open(stream=file_content, filetype="pdf")
+                text = ""
+                for page in doc:
+                    text += page.get_text()
+                return text
+            except Exception as e:
+                logger.error(f"Error reading PDF file: {e}")
+                raise ValueError("Failed to read PDF file. It might be corrupted or not a valid PDF.")
         elif file.filename.endswith(".docx"):
+            try:
+                doc = Document(io.BytesIO(file_content))
+                text = "\n".join([para.text for para in doc.paragraphs])
+                return text
+            except Exception as e:
+                logger.error(f"Error reading DOCX file: {e}")
+                raise ValueError("Failed to read DOCX file. It might be corrupted or not a valid DOCX.")
         elif file.filename.endswith(".txt"):
+            try:
+                return file_content.decode("utf-8")
+            except Exception as e:
+                logger.error(f"Error reading TXT file: {e}")
+                raise ValueError("Failed to read TXT file. It might be corrupted or not a valid TXT.")
     except Exception as e:
         logger.error(f"Error extracting text from file: {e}")
+        raise HTTPException(status_code=400, detail=str(e))
 if __name__ == "__main__":
     import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=7860