Spaces:
Build error
Build error
| import io | |
| import os | |
| import uvicorn | |
| import fitz # PyMuPDF | |
| import pytesseract | |
| from PIL import Image | |
| from fastapi import FastAPI, Request, UploadFile, File, HTTPException | |
| from fastapi.templating import Jinja2Templates | |
| from llama_cpp import Llama | |
| from huggingface_hub import hf_hub_download | |
| from contextlib import asynccontextmanager | |
| # --- Global AI Handler --- | |
| llm = None | |
| async def lifespan(app: FastAPI): | |
| global llm | |
| print("🚀 Cloud Startup: Preparing AI Engine...") | |
| try: | |
| # Step 1: Download the 135M model (Optimized for free CPU tier) | |
| # Using a 1h timeout via README metadata is recommended for this step | |
| model_path = hf_hub_download( | |
| repo_id="bartowski/SmolLM2-135M-Instruct-GGUF", | |
| filename="SmolLM2-135M-Instruct-Q8_0.gguf" | |
| ) | |
| # Step 2: Load the model into memory | |
| llm = Llama( | |
| model_path=model_path, | |
| n_ctx=2048, | |
| n_threads=2, # Optimized for shared cloud CPUs | |
| n_batch=512 | |
| ) | |
| print("✅ AI Engine loaded successfully!") | |
| except Exception as e: | |
| print(f"⚠️ AI Startup Failed: {e}. The app will run in OCR-only mode.") | |
| llm = None # Fallback: App starts, but AI summaries won't work | |
| yield | |
| app = FastAPI(lifespan=lifespan) | |
| templates = Jinja2Templates(directory="templates") | |
| # --- Deep Parsing Logic (OCR) --- | |
| def deep_document_parse(file_bytes, extension): | |
| text_content = [] | |
| try: | |
| if extension == ".pdf": | |
| doc = fitz.open(stream=file_bytes, filetype="pdf") | |
| for page_num, page in enumerate(doc): | |
| page_text = page.get_text().strip() | |
| # If page is an image (less than 50 chars), use OCR | |
| if len(page_text) < 50: | |
| pix = page.get_pixmap(dpi=300) | |
| img = Image.open(io.BytesIO(pix.tobytes("png"))) | |
| page_text = pytesseract.image_to_string(img) | |
| text_content.append(f"[PAGE {page_num + 1}]\n{page_text}") | |
| elif extension in [".png", ".jpg", ".jpeg"]: | |
| img = Image.open(io.BytesIO(file_bytes)) | |
| text_content.append(pytesseract.image_to_string(img)) | |
| except Exception as e: | |
| return f"Error parsing file: {str(e)}" | |
| return "\n\n".join(text_content) | |
| # --- Endpoints --- | |
| async def home(request: Request): | |
| # Renders your index.html from the /templates folder | |
| return templates.TemplateResponse("index.html", {"request": request}) | |
| async def process_file(file: UploadFile = File(...)): | |
| file_bytes = await file.read() | |
| extension = "." + file.filename.split(".")[-1].lower() | |
| return {"extracted_text": deep_document_parse(file_bytes, extension)} | |
| async def generate(request: Request): | |
| if llm is None: | |
| return {"result": "⚠️ AI Engine is offline. Please check logs for download errors."} | |
| data = await request.json() | |
| topic = data.get("topic", "") | |
| mode = data.get("mode", "summarize") | |
| points = data.get("points", 3) | |
| instruction = "Explain this" if mode == "explain" else f"Summarize into {points} points" | |
| prompt = f"<|im_start|>system\nYou are an expert assistant. {instruction}.<|im_end|>\n<|im_start|>user\n{topic}<|im_end|>\n<|im_start|>assistant\n" | |
| try: | |
| response = llm(prompt, max_tokens=500, stop=["<|im_end|>"]) | |
| return {"result": response["choices"][0]["text"]} | |
| except Exception as e: | |
| return {"result": f"AI Generation Error: {str(e)}"} | |
| if __name__ == "__main__": | |
| # Hugging Face MUST use port 7860 and host 0.0.0.0 | |
| port = int(os.environ.get("PORT", 7860)) | |
| uvicorn.run(app, host="0.0.0.0", port=port) |