Spaces:
Running
Running
File size: 9,079 Bytes
9024ad9 2ed2bd7 c7cef80 41494e9 2ed2bd7 9024ad9 0b6e76d 2ed2bd7 fa85955 2ed2bd7 035a845 02a56a9 9024ad9 93c9664 9024ad9 c7cef80 9024ad9 fa85955 9024ad9 0b6e76d 9024ad9 2ed2bd7 93c9664 9024ad9 0b6e76d 2ed2bd7 93c9664 2ed2bd7 0b6e76d 2ed2bd7 0b6e76d 2ed2bd7 0b6e76d 2ed2bd7 0b6e76d 2ed2bd7 0b6e76d 2ed2bd7 0b6e76d 02a56a9 2ed2bd7 0b6e76d 2ed2bd7 0b6e76d 9024ad9 2ed2bd7 93c9664 9024ad9 93c9664 2ed2bd7 93c9664 2ed2bd7 9024ad9 93c9664 6d85a91 0b6e76d 2ed2bd7 93c9664 6d85a91 c7cef80 2ed2bd7 c7cef80 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 | """
Main FastAPI application for text summarizer backend.
"""
import os
import time
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from app.api.v1.routes import api_router
from app.api.v2.routes import api_router as v2_api_router
from app.core.config import settings
from app.core.errors import init_exception_handlers
from app.core.logging import get_logger, setup_logging
from app.core.middleware import request_context_middleware
from app.services.hf_streaming_summarizer import hf_streaming_service
from app.services.summarizer import ollama_service
from app.services.transformers_summarizer import transformers_service
# Set up logging
setup_logging()
logger = get_logger(__name__)
# Create FastAPI app
app = FastAPI(
title="Text Summarizer API",
description="A FastAPI backend with multiple summarization engines: V1 (Ollama + Transformers pipeline), V2 (HuggingFace streaming), V3 (Web scraping + Summarization), and V4 (Structured summarization with Phi-3)",
version="4.0.0",
docs_url="/docs",
redoc_url="/redoc",
# Make app aware of reverse-proxy prefix used by HF Spaces (if any)
root_path=os.getenv("HF_SPACE_ROOT_PATH", ""),
)
# Add CORS middleware
app.add_middleware(
CORSMiddleware,
allow_origins=["*"], # Configure appropriately for production
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Add request context middleware
app.middleware("http")(request_context_middleware)
# Initialize exception handlers
init_exception_handlers(app)
# Include API routes
app.include_router(api_router, prefix="/api/v1")
app.include_router(v2_api_router, prefix="/api/v2")
# Conditionally include V3 router
if settings.enable_v3_scraping:
from app.api.v3.routes import api_router as v3_api_router
app.include_router(v3_api_router, prefix="/api/v3")
logger.info("✅ V3 Web Scraping API enabled")
else:
logger.info("⏭️ V3 Web Scraping API disabled")
# Conditionally include V4 router
if settings.enable_v4_structured:
from app.api.v4.routes import api_router as v4_api_router
app.include_router(v4_api_router, prefix="/api/v4")
logger.info("✅ V4 Structured Summarization API enabled")
else:
logger.info("⏭️ V4 Structured Summarization API disabled")
@app.on_event("startup")
async def startup_event():
"""Application startup event."""
logger.info("Starting Text Summarizer API")
logger.info(f"V1 warmup enabled: {settings.enable_v1_warmup}")
logger.info(f"V2 warmup enabled: {settings.enable_v2_warmup}")
logger.info(f"V3 scraping enabled: {settings.enable_v3_scraping}")
logger.info(f"V4 structured enabled: {settings.enable_v4_structured}")
# V1 Ollama warmup (conditional)
if settings.enable_v1_warmup:
logger.info(f"Ollama host: {settings.ollama_host}")
logger.info(f"Ollama model: {settings.ollama_model}")
# Validate Ollama connectivity
try:
is_healthy = await ollama_service.check_health()
if is_healthy:
logger.info("✅ Ollama service is accessible and healthy")
else:
logger.warning("⚠️ Ollama service is not responding properly")
logger.warning(
f" Please ensure Ollama is running at {settings.ollama_host}"
)
logger.warning(
f" And that model '{settings.ollama_model}' is available"
)
except Exception as e:
logger.error(f"❌ Failed to connect to Ollama: {e}")
logger.error(
f" Please check that Ollama is running at {settings.ollama_host}"
)
logger.error(f" And that model '{settings.ollama_model}' is installed")
# Warm up the Ollama model
logger.info("🔥 Warming up Ollama model...")
try:
warmup_start = time.time()
await ollama_service.warm_up_model()
warmup_time = time.time() - warmup_start
logger.info(f"✅ Ollama model warmup completed in {warmup_time:.2f}s")
except Exception as e:
logger.warning(f"⚠️ Ollama model warmup failed: {e}")
else:
logger.info("⏭️ Skipping V1 Ollama warmup (disabled)")
# V1 Transformers pipeline warmup (always enabled for backward compatibility)
logger.info("🔥 Warming up Transformers pipeline model...")
try:
pipeline_start = time.time()
await transformers_service.warm_up_model()
pipeline_time = time.time() - pipeline_start
logger.info(f"✅ Pipeline warmup completed in {pipeline_time:.2f}s")
except Exception as e:
logger.warning(f"⚠️ Pipeline warmup failed: {e}")
# V2 HuggingFace warmup (conditional)
if settings.enable_v2_warmup:
logger.info(f"HuggingFace model: {settings.hf_model_id}")
logger.info("🔥 Warming up HuggingFace model...")
try:
hf_start = time.time()
await hf_streaming_service.warm_up_model()
hf_time = time.time() - hf_start
logger.info(f"✅ HuggingFace model warmup completed in {hf_time:.2f}s")
except Exception as e:
logger.warning(f"⚠️ HuggingFace model warmup failed: {e}")
logger.warning(
"V2 endpoints will be disabled until model loads successfully"
)
else:
logger.info("⏭️ Skipping V2 HuggingFace warmup (disabled)")
# V3 scraping service info
if settings.enable_v3_scraping:
logger.info(f"V3 scraping timeout: {settings.scraping_timeout}s")
logger.info(f"V3 cache enabled: {settings.scraping_cache_enabled}")
if settings.scraping_cache_enabled:
logger.info(f"V3 cache TTL: {settings.scraping_cache_ttl}s")
# V4 structured summarization warmup (conditional)
if settings.enable_v4_structured:
logger.info(f"V4 warmup enabled: {settings.enable_v4_warmup}")
logger.info(f"V4 model: {settings.v4_model_id}")
if settings.enable_v4_warmup:
from app.services.structured_summarizer import structured_summarizer_service
logger.info("🔥 Warming up V4 Phi-3 model (this may take 30-60s)...")
try:
v4_start = time.time()
await structured_summarizer_service.warm_up_model()
v4_time = time.time() - v4_start
logger.info(f"✅ V4 model warmup completed in {v4_time:.2f}s")
except Exception as e:
logger.warning(f"⚠️ V4 model warmup failed: {e}")
logger.warning("V4 endpoints will be slower on first request")
else:
logger.info("⏭️ Skipping V4 warmup (disabled to save memory)")
@app.on_event("shutdown")
async def shutdown_event():
"""Application shutdown event."""
logger.info("Shutting down Text Summarizer API")
@app.get("/")
async def root():
"""Root endpoint."""
return {
"message": "Text Summarizer API",
"version": "4.0.0",
"docs": "/docs",
"endpoints": {
"v1": "/api/v1",
"v2": "/api/v2",
"v3": "/api/v3" if settings.enable_v3_scraping else None,
"v4": "/api/v4" if settings.enable_v4_structured else None,
},
}
@app.get("/health")
async def health_check():
"""Health check endpoint."""
return {"status": "ok", "service": "text-summarizer-api", "version": "4.0.0"}
@app.get("/debug/config")
async def debug_config():
"""Debug endpoint to show current configuration."""
return {
"ollama_host": settings.ollama_host,
"ollama_model": settings.ollama_model,
"ollama_timeout": settings.ollama_timeout,
"server_host": settings.server_host,
"server_port": settings.server_port,
"hf_model_id": settings.hf_model_id,
"hf_device_map": settings.hf_device_map,
"enable_v1_warmup": settings.enable_v1_warmup,
"enable_v2_warmup": settings.enable_v2_warmup,
"enable_v3_scraping": settings.enable_v3_scraping,
"scraping_timeout": (
settings.scraping_timeout if settings.enable_v3_scraping else None
),
"scraping_cache_enabled": (
settings.scraping_cache_enabled if settings.enable_v3_scraping else None
),
"enable_v4_structured": settings.enable_v4_structured,
"enable_v4_warmup": (
settings.enable_v4_warmup if settings.enable_v4_structured else None
),
"v4_model_id": settings.v4_model_id if settings.enable_v4_structured else None,
"v4_max_tokens": (
settings.v4_max_tokens if settings.enable_v4_structured else None
),
}
if __name__ == "__main__":
# Local/dev runner. On HF Spaces, the platform will spawn uvicorn for main:app,
# but this keeps behavior consistent if launched manually.
import uvicorn
uvicorn.run("app.main:app", host="0.0.0.0", port=7860, reload=False)
|