Spaces:

Akshat1414
/

elite-engine

Build error

App Files Files Community

elite-engine / main.py

Akshat1414

Update main.py

1a587d4 verified 2 months ago

raw

history blame contribute delete

3.78 kB

	import io
	import os
	import uvicorn
	import fitz # PyMuPDF
	import pytesseract
	from PIL import Image
	from fastapi import FastAPI, Request, UploadFile, File, HTTPException
	from fastapi.templating import Jinja2Templates
	from llama_cpp import Llama
	from huggingface_hub import hf_hub_download
	from contextlib import asynccontextmanager

	# --- Global AI Handler ---
	llm = None

	@asynccontextmanager
	async def lifespan(app: FastAPI):
	global llm
	print("🚀 Cloud Startup: Preparing AI Engine...")
	try:
	# Step 1: Download the 135M model (Optimized for free CPU tier)
	# Using a 1h timeout via README metadata is recommended for this step
	model_path = hf_hub_download(
	repo_id="bartowski/SmolLM2-135M-Instruct-GGUF",
	filename="SmolLM2-135M-Instruct-Q8_0.gguf"
	)

	# Step 2: Load the model into memory
	llm = Llama(
	model_path=model_path,
	n_ctx=2048,
	n_threads=2, # Optimized for shared cloud CPUs
	n_batch=512
	)
	print("✅ AI Engine loaded successfully!")
	except Exception as e:
	print(f"⚠️ AI Startup Failed: {e}. The app will run in OCR-only mode.")
	llm = None # Fallback: App starts, but AI summaries won't work
	yield

	app = FastAPI(lifespan=lifespan)
	templates = Jinja2Templates(directory="templates")

	# --- Deep Parsing Logic (OCR) ---
	def deep_document_parse(file_bytes, extension):
	text_content = []
	try:
	if extension == ".pdf":
	doc = fitz.open(stream=file_bytes, filetype="pdf")
	for page_num, page in enumerate(doc):
	page_text = page.get_text().strip()
	# If page is an image (less than 50 chars), use OCR
	if len(page_text) < 50:
	pix = page.get_pixmap(dpi=300)
	img = Image.open(io.BytesIO(pix.tobytes("png")))
	page_text = pytesseract.image_to_string(img)
	text_content.append(f"[PAGE {page_num + 1}]\n{page_text}")
	elif extension in [".png", ".jpg", ".jpeg"]:
	img = Image.open(io.BytesIO(file_bytes))
	text_content.append(pytesseract.image_to_string(img))
	except Exception as e:
	return f"Error parsing file: {str(e)}"

	return "\n\n".join(text_content)

	# --- Endpoints ---
	@app.get("/")
	async def home(request: Request):
	# Renders your index.html from the /templates folder
	return templates.TemplateResponse("index.html", {"request": request})

	@app.post("/process-file")
	async def process_file(file: UploadFile = File(...)):
	file_bytes = await file.read()
	extension = "." + file.filename.split(".")[-1].lower()
	return {"extracted_text": deep_document_parse(file_bytes, extension)}

	@app.post("/generate")
	async def generate(request: Request):
	if llm is None:
	return {"result": "⚠️ AI Engine is offline. Please check logs for download errors."}

	data = await request.json()
	topic = data.get("topic", "")
	mode = data.get("mode", "summarize")
	points = data.get("points", 3)

	instruction = "Explain this" if mode == "explain" else f"Summarize into {points} points"

	prompt = f"<\|im_start\|>system\nYou are an expert assistant. {instruction}.<\|im_end\|>\n<\|im_start\|>user\n{topic}<\|im_end\|>\n<\|im_start\|>assistant\n"

	try:
	response = llm(prompt, max_tokens=500, stop=["<\|im_end\|>"])
	return {"result": response["choices"][0]["text"]}
	except Exception as e:
	return {"result": f"AI Generation Error: {str(e)}"}

	if __name__ == "__main__":
	# Hugging Face MUST use port 7860 and host 0.0.0.0
	port = int(os.environ.get("PORT", 7860))
	uvicorn.run(app, host="0.0.0.0", port=port)