SummarizerApp / app /main.py
ming
feat: Add V4 NDJSON patch-based structured summarization
93c9664
"""
Main FastAPI application for text summarizer backend.
"""
import os
import time
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from app.api.v1.routes import api_router
from app.api.v2.routes import api_router as v2_api_router
from app.core.config import settings
from app.core.errors import init_exception_handlers
from app.core.logging import get_logger, setup_logging
from app.core.middleware import request_context_middleware
from app.services.hf_streaming_summarizer import hf_streaming_service
from app.services.summarizer import ollama_service
from app.services.transformers_summarizer import transformers_service
# Set up logging
setup_logging()
logger = get_logger(__name__)
# Create FastAPI app
app = FastAPI(
title="Text Summarizer API",
description="A FastAPI backend with multiple summarization engines: V1 (Ollama + Transformers pipeline), V2 (HuggingFace streaming), V3 (Web scraping + Summarization), and V4 (Structured summarization with Phi-3)",
version="4.0.0",
docs_url="/docs",
redoc_url="/redoc",
# Make app aware of reverse-proxy prefix used by HF Spaces (if any)
root_path=os.getenv("HF_SPACE_ROOT_PATH", ""),
)
# Add CORS middleware
app.add_middleware(
CORSMiddleware,
allow_origins=["*"], # Configure appropriately for production
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Add request context middleware
app.middleware("http")(request_context_middleware)
# Initialize exception handlers
init_exception_handlers(app)
# Include API routes
app.include_router(api_router, prefix="/api/v1")
app.include_router(v2_api_router, prefix="/api/v2")
# Conditionally include V3 router
if settings.enable_v3_scraping:
from app.api.v3.routes import api_router as v3_api_router
app.include_router(v3_api_router, prefix="/api/v3")
logger.info("✅ V3 Web Scraping API enabled")
else:
logger.info("⏭️ V3 Web Scraping API disabled")
# Conditionally include V4 router
if settings.enable_v4_structured:
from app.api.v4.routes import api_router as v4_api_router
app.include_router(v4_api_router, prefix="/api/v4")
logger.info("✅ V4 Structured Summarization API enabled")
else:
logger.info("⏭️ V4 Structured Summarization API disabled")
@app.on_event("startup")
async def startup_event():
"""Application startup event."""
logger.info("Starting Text Summarizer API")
logger.info(f"V1 warmup enabled: {settings.enable_v1_warmup}")
logger.info(f"V2 warmup enabled: {settings.enable_v2_warmup}")
logger.info(f"V3 scraping enabled: {settings.enable_v3_scraping}")
logger.info(f"V4 structured enabled: {settings.enable_v4_structured}")
# V1 Ollama warmup (conditional)
if settings.enable_v1_warmup:
logger.info(f"Ollama host: {settings.ollama_host}")
logger.info(f"Ollama model: {settings.ollama_model}")
# Validate Ollama connectivity
try:
is_healthy = await ollama_service.check_health()
if is_healthy:
logger.info("✅ Ollama service is accessible and healthy")
else:
logger.warning("⚠️ Ollama service is not responding properly")
logger.warning(
f" Please ensure Ollama is running at {settings.ollama_host}"
)
logger.warning(
f" And that model '{settings.ollama_model}' is available"
)
except Exception as e:
logger.error(f"❌ Failed to connect to Ollama: {e}")
logger.error(
f" Please check that Ollama is running at {settings.ollama_host}"
)
logger.error(f" And that model '{settings.ollama_model}' is installed")
# Warm up the Ollama model
logger.info("🔥 Warming up Ollama model...")
try:
warmup_start = time.time()
await ollama_service.warm_up_model()
warmup_time = time.time() - warmup_start
logger.info(f"✅ Ollama model warmup completed in {warmup_time:.2f}s")
except Exception as e:
logger.warning(f"⚠️ Ollama model warmup failed: {e}")
else:
logger.info("⏭️ Skipping V1 Ollama warmup (disabled)")
# V1 Transformers pipeline warmup (always enabled for backward compatibility)
logger.info("🔥 Warming up Transformers pipeline model...")
try:
pipeline_start = time.time()
await transformers_service.warm_up_model()
pipeline_time = time.time() - pipeline_start
logger.info(f"✅ Pipeline warmup completed in {pipeline_time:.2f}s")
except Exception as e:
logger.warning(f"⚠️ Pipeline warmup failed: {e}")
# V2 HuggingFace warmup (conditional)
if settings.enable_v2_warmup:
logger.info(f"HuggingFace model: {settings.hf_model_id}")
logger.info("🔥 Warming up HuggingFace model...")
try:
hf_start = time.time()
await hf_streaming_service.warm_up_model()
hf_time = time.time() - hf_start
logger.info(f"✅ HuggingFace model warmup completed in {hf_time:.2f}s")
except Exception as e:
logger.warning(f"⚠️ HuggingFace model warmup failed: {e}")
logger.warning(
"V2 endpoints will be disabled until model loads successfully"
)
else:
logger.info("⏭️ Skipping V2 HuggingFace warmup (disabled)")
# V3 scraping service info
if settings.enable_v3_scraping:
logger.info(f"V3 scraping timeout: {settings.scraping_timeout}s")
logger.info(f"V3 cache enabled: {settings.scraping_cache_enabled}")
if settings.scraping_cache_enabled:
logger.info(f"V3 cache TTL: {settings.scraping_cache_ttl}s")
# V4 structured summarization warmup (conditional)
if settings.enable_v4_structured:
logger.info(f"V4 warmup enabled: {settings.enable_v4_warmup}")
logger.info(f"V4 model: {settings.v4_model_id}")
if settings.enable_v4_warmup:
from app.services.structured_summarizer import structured_summarizer_service
logger.info("🔥 Warming up V4 Phi-3 model (this may take 30-60s)...")
try:
v4_start = time.time()
await structured_summarizer_service.warm_up_model()
v4_time = time.time() - v4_start
logger.info(f"✅ V4 model warmup completed in {v4_time:.2f}s")
except Exception as e:
logger.warning(f"⚠️ V4 model warmup failed: {e}")
logger.warning("V4 endpoints will be slower on first request")
else:
logger.info("⏭️ Skipping V4 warmup (disabled to save memory)")
@app.on_event("shutdown")
async def shutdown_event():
"""Application shutdown event."""
logger.info("Shutting down Text Summarizer API")
@app.get("/")
async def root():
"""Root endpoint."""
return {
"message": "Text Summarizer API",
"version": "4.0.0",
"docs": "/docs",
"endpoints": {
"v1": "/api/v1",
"v2": "/api/v2",
"v3": "/api/v3" if settings.enable_v3_scraping else None,
"v4": "/api/v4" if settings.enable_v4_structured else None,
},
}
@app.get("/health")
async def health_check():
"""Health check endpoint."""
return {"status": "ok", "service": "text-summarizer-api", "version": "4.0.0"}
@app.get("/debug/config")
async def debug_config():
"""Debug endpoint to show current configuration."""
return {
"ollama_host": settings.ollama_host,
"ollama_model": settings.ollama_model,
"ollama_timeout": settings.ollama_timeout,
"server_host": settings.server_host,
"server_port": settings.server_port,
"hf_model_id": settings.hf_model_id,
"hf_device_map": settings.hf_device_map,
"enable_v1_warmup": settings.enable_v1_warmup,
"enable_v2_warmup": settings.enable_v2_warmup,
"enable_v3_scraping": settings.enable_v3_scraping,
"scraping_timeout": (
settings.scraping_timeout if settings.enable_v3_scraping else None
),
"scraping_cache_enabled": (
settings.scraping_cache_enabled if settings.enable_v3_scraping else None
),
"enable_v4_structured": settings.enable_v4_structured,
"enable_v4_warmup": (
settings.enable_v4_warmup if settings.enable_v4_structured else None
),
"v4_model_id": settings.v4_model_id if settings.enable_v4_structured else None,
"v4_max_tokens": (
settings.v4_max_tokens if settings.enable_v4_structured else None
),
}
if __name__ == "__main__":
# Local/dev runner. On HF Spaces, the platform will spawn uvicorn for main:app,
# but this keeps behavior consistent if launched manually.
import uvicorn
uvicorn.run("app.main:app", host="0.0.0.0", port=7860, reload=False)