Spaces:

viskav
/

Summarizer

Sleeping

App Files Files Community

Summarizer / app.py

viskav

Update app.py

105b25f verified 14 days ago

raw

history blame contribute delete

5.47 kB

	from fastapi import FastAPI, HTTPException
	from fastapi.middleware.cors import CORSMiddleware
	from pydantic import BaseModel, Field
	from contextlib import asynccontextmanager
	import re
	import os
	import logging

	# Configure logging
	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	try:
	from llama_cpp import Llama
	except ImportError:
	raise ImportError("Install llama-cpp-python: pip install llama-cpp-python")

	MODEL_REPO = "bartowski/Phi-3.5-mini-instruct-GGUF"
	MODEL_FILE = "Phi-3.5-mini-instruct-Q4_K_M.gguf"

	llm = None
	model_loading = False

	@asynccontextmanager
	async def lifespan(app: FastAPI):
	global llm, model_loading
	try:
	logger.info("🚀 Starting model load...")
	model_loading = True

	# Set cache directory for Hugging Face Spaces
	cache_dir = os.getenv("HF_HOME", "./models")

	llm = Llama.from_pretrained(
	repo_id=MODEL_REPO,
	filename=MODEL_FILE,
	n_threads=4,
	n_ctx=2048,
	n_batch=256,
	n_gpu_layers=0,
	verbose=False,
	)
	model_loading = False
	logger.info("✅ Model loaded and ready")
	except Exception as e:
	logger.error(f"❌ Model load error: {e}")
	model_loading = False
	llm = None

	yield

	logger.info("🛑 Shutting down...")
	if llm:
	del llm

	app = FastAPI(
	title="AI Summarizer",
	description="Fast & Accurate AI Text Summarizer",
	version="1.0",
	lifespan=lifespan
	)

	app.add_middleware(
	CORSMiddleware,
	allow_origins=["*"],
	allow_methods=["*"],
	allow_headers=["*"],
	)

	class SummarizeRequest(BaseModel):
	text: str = Field(..., min_length=1, max_length=2000)
	length: str = Field(default="short", pattern="^(short\|medium\|long)$")

	LENGTH_INSTRUCTIONS = {
	"short": "Summarize in 2–3 concise sentences.",
	"medium": "Summarize in 4–5 clear sentences.",
	"long": "Summarize in a detailed paragraph.",
	}

	def clean_output(text: str) -> str:
	"""Clean model output from special tokens"""
	text = re.sub(r"<\\|.*?\\|>", "", text)
	text = re.sub(r"\s+", " ", text)
	return text.strip()

	@app.get("/")
	def root():
	"""Root endpoint - returns status"""
	return {
	"status": "healthy",
	"model_loaded": llm is not None,
	"model_loading": model_loading,
	"message": "AI Summarizer API is running"
	}

	@app.get("/health")
	def health():
	"""Health check endpoint for container orchestration"""
	if model_loading:
	return {
	"status": "starting",
	"model_loaded": False,
	"model_loading": True,
	"message": "Model is loading, please wait..."
	}

	if llm is None:
	return {
	"status": "unhealthy",
	"model_loaded": False,
	"model_loading": False,
	"message": "Model failed to load"
	}

	return {
	"status": "healthy",
	"model_loaded": True,
	"model_loading": False,
	"model_name": MODEL_FILE,
	"message": "Ready to summarize"
	}

	@app.get("/ready")
	def readiness():
	"""Readiness probe - returns 200 only when model is loaded"""
	if llm is not None and not model_loading:
	return {"ready": True}
	raise HTTPException(status_code=503, detail="Model not ready")

	@app.post("/api/summarize")
	async def summarize(req: SummarizeRequest):
	if model_loading:
	raise HTTPException(
	status_code=503,
	detail="Model is still loading. Please wait and try again."
	)

	if llm is None:
	raise HTTPException(
	status_code=503,
	detail="Model not loaded. Check server logs."
	)

	try:
	text = req.text.strip()
	length_instruction = LENGTH_INSTRUCTIONS.get(
	req.length,
	LENGTH_INSTRUCTIONS["short"]
	)

	prompt = f"""<\|user\|>
	You are an expert text summarizer.
	{length_instruction}

	Text:
	{text}
	<\|end\|>
	<\|assistant\|>"""

	max_tokens_map = {
	"short": 140,
	"medium": 220,
	"long": 300
	}

	logger.info(f"Summarizing text (length: {req.length})")

	output = llm(
	prompt,
	max_tokens=max_tokens_map.get(req.length, 140),
	temperature=0.3,
	top_p=0.9,
	top_k=40,
	repeat_penalty=1.05,
	stop=["<\|end\|>", "<\|user\|>"],
	echo=False
	)

	summary = clean_output(output["choices"][0]["text"])

	if not summary:
	raise HTTPException(
	status_code=500,
	detail="Model produced empty output"
	)

	logger.info("✅ Summary generated successfully")

	return {
	"summary": summary,
	"success": True,
	"length": req.length
	}

	except HTTPException:
	raise
	except Exception as e:
	logger.error(f"Summarization error: {e}")
	raise HTTPException(
	status_code=500,
	detail=f"Summarization error: {str(e)}"
	)

	if __name__ == "__main__":
	import uvicorn
	# Use PORT environment variable for Hugging Face Spaces
	port = int(os.getenv("PORT", 7860))
	uvicorn.run(app, host="0.0.0.0", port=port)