|
|
|
|
|
import os |
|
|
import logging |
|
|
from fastapi import FastAPI, UploadFile, File, HTTPException |
|
|
from fastapi.middleware.cors import CORSMiddleware |
|
|
from fastapi.responses import FileResponse, JSONResponse |
|
|
from fastapi.staticfiles import StaticFiles |
|
|
from starlette.status import HTTP_400_BAD_REQUEST, HTTP_500_INTERNAL_SERVER_ERROR |
|
|
|
|
|
from summarizer import generate_summary |
|
|
from pdf_reader import extract_text_from_pdf |
|
|
|
|
|
|
|
|
logging.basicConfig(level=logging.INFO) |
|
|
logger = logging.getLogger("summarizer_app") |
|
|
|
|
|
app = FastAPI(title="Scientific Article Summarizer") |
|
|
|
|
|
|
|
|
app.add_middleware( |
|
|
CORSMiddleware, |
|
|
allow_origins=["*"], |
|
|
allow_methods=["*"], |
|
|
allow_headers=["*"], |
|
|
) |
|
|
|
|
|
|
|
|
app.mount("/static", StaticFiles(directory="."), name="static") |
|
|
|
|
|
@app.get("/") |
|
|
async def root(): |
|
|
return FileResponse("index.html") |
|
|
|
|
|
@app.post("/summarize/text") |
|
|
async def summarize_text(data: dict): |
|
|
text = data.get("text", "") |
|
|
if not text or not text.strip(): |
|
|
raise HTTPException(status_code=HTTP_400_BAD_REQUEST, detail="No text provided.") |
|
|
try: |
|
|
summary = generate_summary(text) |
|
|
return {"summary": summary} |
|
|
except Exception as e: |
|
|
logger.exception("Error while generating text summary") |
|
|
raise HTTPException(status_code=HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e)) |
|
|
|
|
|
MAX_PDF_BYTES = int(os.getenv("MAX_PDF_BYTES", 10 * 1024 * 1024)) |
|
|
|
|
|
@app.post("/summarize/pdf") |
|
|
async def summarize_pdf(file: UploadFile = File(...)): |
|
|
if not file.filename.lower().endswith(".pdf"): |
|
|
raise HTTPException(status_code=HTTP_400_BAD_REQUEST, detail="Only PDF files are supported.") |
|
|
pdf_bytes = await file.read() |
|
|
if len(pdf_bytes) == 0: |
|
|
raise HTTPException(status_code=HTTP_400_BAD_REQUEST, detail="Uploaded PDF is empty.") |
|
|
if len(pdf_bytes) > MAX_PDF_BYTES: |
|
|
raise HTTPException(status_code=HTTP_400_BAD_REQUEST, detail=f"PDF too large (max {MAX_PDF_BYTES} bytes).") |
|
|
try: |
|
|
text = extract_text_from_pdf(pdf_bytes) |
|
|
if not text or not text.strip(): |
|
|
return {"summary": "PDF is empty or could not be processed."} |
|
|
summary = generate_summary(text) |
|
|
return {"summary": summary} |
|
|
except Exception as e: |
|
|
logger.exception("Error while processing PDF") |
|
|
raise HTTPException(status_code=HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e)) |
|
|
|
|
|
|
|
|
@app.exception_handler(HTTPException) |
|
|
async def http_exception_handler(request, exc: HTTPException): |
|
|
return JSONResponse(status_code=exc.status_code, content={"error": exc.detail}) |
|
|
|