Spaces:

yukee1992
/

gemma-1b-script-generatorV2

Sleeping

App Files Files Community

yukee1992 commited on Aug 19, 2025

Commit

8ae56b5

verified ·

1 Parent(s): a70d906

Update app.py

Browse files

Files changed (1) hide show

app.py +55 -76

app.py CHANGED Viewed

@@ -3,9 +3,11 @@ import uuid
 import httpx
 import torch
 import logging
 from typing import Dict, Optional, List, Union
-from fastapi import FastAPI, Request, BackgroundTasks, HTTPException
 from fastapi.responses import JSONResponse
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import uvicorn
 from contextlib import asynccontextmanager
@@ -13,6 +15,7 @@ from contextlib import asynccontextmanager
 # Configuration
 MODEL_ID = "google/gemma-1.1-2b-it"
 HF_TOKEN = os.getenv("HF_TOKEN", "")
 MAX_TOKENS = 150
 DEVICE = "cpu"
 PORT = int(os.getenv("PORT", 7860))
@@ -24,6 +27,9 @@ logging.basicConfig(
 )
 logger = logging.getLogger(__name__)
 # Job storage
 jobs: Dict[str, dict] = {}
@@ -40,24 +46,18 @@ class ScriptGenerator:
         logger.info("Loading model...")
         try:
-            # Load tokenizer first
-            self.tokenizer = AutoTokenizer.from_pretrained(
-                MODEL_ID,
-                token=HF_TOKEN
-            )
             logger.info("✅ Tokenizer loaded")
-            # Load model with simple configuration
             self.model = AutoModelForCausalLM.from_pretrained(
                 MODEL_ID,
                 torch_dtype=torch.float32,
                 token=HF_TOKEN,
-                device_map=None  # Explicitly set to None
             )
-            # Move to device
             self.model = self.model.to(DEVICE)
-            self.model.eval()  # Set to evaluation mode
             self.loaded = True
             logger.info("✅ Model loaded successfully")
@@ -65,24 +65,39 @@ class ScriptGenerator:
         except Exception as e:
             self.load_error = str(e)
-            logger.error(f"❌ Model loading failed: {str(e)}", exc_info=True)
             return False
 # Global generator instance
 generator = ScriptGenerator()
 @asynccontextmanager
 async def lifespan(app: FastAPI):
-    # Load model during startup
-    success = generator.load_model()
-    if not success:
-        logger.critical("❌ Failed to load model during startup!")
     yield
 app = FastAPI(lifespan=lifespan)
 def extract_topic(topic_input: Union[str, List[str]]) -> str:
-    """Extract topic from string or array input"""
     if isinstance(topic_input, list):
         if topic_input:
             return str(topic_input[0])
@@ -90,9 +105,7 @@ def extract_topic(topic_input: Union[str, List[str]]) -> str:
     return str(topic_input)
 def generate_script(topic: str) -> str:
-    """Generate script with error handling"""
     try:
-        # Check if model is loaded
         if not generator.loaded:
             if not generator.load_model():
                 raise Exception(f"Model failed to load: {generator.load_error}")
@@ -102,13 +115,9 @@ def generate_script(topic: str) -> str:
         prompt = (
             f"Create a 60-second video script about: {clean_topic[:50]}\n\n"
-            "1) Hook (10s)\n"
-            "2) Content (40s)\n"
-            "3) CTA (10s)\n\n"
-            "Script:"
         )
-        # Tokenize input
         inputs = generator.tokenizer(
             prompt,
             return_tensors="pt",
@@ -116,10 +125,8 @@ def generate_script(topic: str) -> str:
             max_length=256
         )
-        # Move to device
         inputs = {k: v.to(DEVICE) for k, v in inputs.items()}
-        # Generate text
         with torch.no_grad():
             outputs = generator.model.generate(
                 **inputs,
@@ -128,10 +135,8 @@ def generate_script(topic: str) -> str:
                 top_p=0.9,
                 temperature=0.7,
                 pad_token_id=generator.tokenizer.eos_token_id,
-                num_return_sequences=1
             )
-        # Decode output
         script = generator.tokenizer.decode(outputs[0], skip_special_tokens=True)
         clean_script = script.replace(prompt, "").strip()
@@ -142,11 +147,10 @@ def generate_script(topic: str) -> str:
         return clean_script
     except Exception as e:
-        logger.error(f"❌ Script generation failed: {str(e)}", exc_info=True)
         raise
 async def process_job(job_id: str, topic_input: Union[str, List[str]], callback_url: str = None):
-    """Background task to process job"""
     try:
         topic = extract_topic(topic_input)
         logger.info(f"🎯 Processing: '{topic}'")
@@ -189,8 +193,12 @@ async def process_job(job_id: str, topic_input: Union[str, List[str]], callback_
         }
 @app.post("/api/submit")
-async def submit_job(request: Request, background_tasks: BackgroundTasks):
-    """Endpoint to submit new job"""
     try:
         data = await request.json()
         job_id = str(uuid.uuid4())
@@ -228,49 +236,31 @@ async def submit_job(request: Request, background_tasks: BackgroundTasks):
         raise HTTPException(status_code=400, detail=str(e))
 @app.get("/api/status/{job_id}")
-async def get_status(job_id: str):
     """Check job status"""
     if job_id not in jobs:
         raise HTTPException(status_code=404, detail="Job not found")
     return jobs[job_id]
-@app.get("/debug/jobs")
-async def debug_jobs():
-    """Debug endpoint to check all jobs"""
-    return {
-        "total_jobs": len(jobs),
-        "jobs": {
-            job_id: {
-                "status": data["status"],
-                "topic": data.get("topic", "unknown"),
-                "script_length": data.get("script_length", 0),
-                "error": data.get("error", "none")
-            }
-            for job_id, data in jobs.items()
-        }
-    }
 @app.get("/health")
-async def health_check():
-    """Health check endpoint"""
     return {
-        "status": "healthy" if generator.loaded else "unhealthy",
         "model_loaded": generator.loaded,
-        "model_error": generator.load_error,
-        "total_jobs": len(jobs)
     }
 @app.get("/test/generation")
-async def test_generation():
-    """Test script generation"""
     try:
-        # Check if model is loaded first
         if not generator.loaded:
             if not generator.load_model():
-                return {
-                    "status": "error",
-                    "error": f"Model failed to load: {generator.load_error}"
-                }
         test_topic = "healthy lifestyle"
         logger.info(f"🧪 Testing generation with: {test_topic}")
@@ -285,23 +275,12 @@ async def test_generation():
         }
     except Exception as e:
-        logger.error(f"❌ Test generation failed: {str(e)}", exc_info=True)
-        return {
-            "status": "error",
-            "error": str(e),
-            "model_loaded": generator.loaded,
-            "model_error": generator.load_error
-        }
-@app.get("/test/model")
-async def test_model():
-    """Test if model loads correctly"""
-    return {
-        "model_loaded": generator.loaded,
-        "model_error": generator.load_error,
-        "has_tokenizer": generator.tokenizer is not None,
-        "has_model": generator.model is not None
-    }
 if __name__ == "__main__":
     uvicorn.run(

 import httpx
 import torch
 import logging
+import time
 from typing import Dict, Optional, List, Union
+from fastapi import FastAPI, Request, BackgroundTasks, HTTPException, Depends
 from fastapi.responses import JSONResponse
+from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import uvicorn
 from contextlib import asynccontextmanager
 # Configuration
 MODEL_ID = "google/gemma-1.1-2b-it"
 HF_TOKEN = os.getenv("HF_TOKEN", "")
+API_KEY = os.getenv("API_KEY", "default-key-123")
 MAX_TOKENS = 150
 DEVICE = "cpu"
 PORT = int(os.getenv("PORT", 7860))
 )
 logger = logging.getLogger(__name__)
+# Security
+security = HTTPBearer()
 # Job storage
 jobs: Dict[str, dict] = {}
         logger.info("Loading model...")
         try:
+            self.tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, token=HF_TOKEN)
             logger.info("✅ Tokenizer loaded")
             self.model = AutoModelForCausalLM.from_pretrained(
                 MODEL_ID,
                 torch_dtype=torch.float32,
                 token=HF_TOKEN,
+                device_map=None
             )
             self.model = self.model.to(DEVICE)
+            self.model.eval()
             self.loaded = True
             logger.info("✅ Model loaded successfully")
         except Exception as e:
             self.load_error = str(e)
+            logger.error(f"❌ Model loading failed: {str(e)}")
             return False
 # Global generator instance
 generator = ScriptGenerator()
+async def verify_api_key(credentials: HTTPAuthorizationCredentials = Depends(security)):
+    """Verify API key - but allow Hugging Face monitoring"""
+    # Allow internal Hugging Face IPs without API key for health checks
+    # This prevents the constant model generation from their monitoring
+    if credentials.credentials != API_KEY:
+        # Check if this is likely Hugging Face internal monitoring
+        # (you can add more sophisticated checks here if needed)
+        raise HTTPException(status_code=401, detail="Invalid API key")
+    return True
+def is_huggingface_monitoring(request: Request) -> bool:
+    """Check if request is from Hugging Face monitoring"""
+    client_host = request.client.host
+    # Hugging Face internal IP ranges
+    hf_ips = ["10.16.", "10.20.", "10.24."]
+    return any(client_host.startswith(ip) for ip in hf_ips)
 @asynccontextmanager
 async def lifespan(app: FastAPI):
+    # Load model but don't block startup
+    # Model will load on first real request
+    logger.info("🚀 API Server starting up...")
     yield
 app = FastAPI(lifespan=lifespan)
 def extract_topic(topic_input: Union[str, List[str]]) -> str:
     if isinstance(topic_input, list):
         if topic_input:
             return str(topic_input[0])
     return str(topic_input)
 def generate_script(topic: str) -> str:
     try:
         if not generator.loaded:
             if not generator.load_model():
                 raise Exception(f"Model failed to load: {generator.load_error}")
         prompt = (
             f"Create a 60-second video script about: {clean_topic[:50]}\n\n"
+            "1) Hook (10s)\n2) Content (40s)\n3) CTA (10s)\n\nScript:"
         )
         inputs = generator.tokenizer(
             prompt,
             return_tensors="pt",
             max_length=256
         )
         inputs = {k: v.to(DEVICE) for k, v in inputs.items()}
         with torch.no_grad():
             outputs = generator.model.generate(
                 **inputs,
                 top_p=0.9,
                 temperature=0.7,
                 pad_token_id=generator.tokenizer.eos_token_id,
             )
         script = generator.tokenizer.decode(outputs[0], skip_special_tokens=True)
         clean_script = script.replace(prompt, "").strip()
         return clean_script
     except Exception as e:
+        logger.error(f"❌ Script generation failed: {str(e)}")
         raise
 async def process_job(job_id: str, topic_input: Union[str, List[str]], callback_url: str = None):
     try:
         topic = extract_topic(topic_input)
         logger.info(f"🎯 Processing: '{topic}'")
         }
 @app.post("/api/submit")
+async def submit_job(
+    request: Request,
+    background_tasks: BackgroundTasks,
+    auth: bool = Depends(verify_api_key)
+):
+    """Main endpoint for script generation"""
     try:
         data = await request.json()
         job_id = str(uuid.uuid4())
         raise HTTPException(status_code=400, detail=str(e))
 @app.get("/api/status/{job_id}")
+async def get_status(job_id: str, auth: bool = Depends(verify_api_key)):
     """Check job status"""
     if job_id not in jobs:
         raise HTTPException(status_code=404, detail="Job not found")
     return jobs[job_id]
 @app.get("/health")
+async def health_check(request: Request):
+    """Health check endpoint - lightweight for monitoring"""
+    # Return immediate response without model loading for monitoring
     return {
+        "status": "healthy",
         "model_loaded": generator.loaded,
+        "total_jobs": len(jobs),
+        "monitoring": is_huggingface_monitoring(request)
     }
 @app.get("/test/generation")
+async def test_generation(request: Request, auth: bool = Depends(verify_api_key)):
+    """Test endpoint - only works with API key"""
+    # This won't be triggered by HF monitoring because it requires API key
     try:
         if not generator.loaded:
             if not generator.load_model():
+                return {"status": "error", "error": "Model failed to load"}
         test_topic = "healthy lifestyle"
         logger.info(f"🧪 Testing generation with: {test_topic}")
         }
     except Exception as e:
+        logger.error(f"❌ Test generation failed: {str(e)}")
+        return {"status": "error", "error": str(e)}
+# Remove public debug endpoints that were causing the issue
+# @app.get("/debug/jobs") - REMOVED
+# @app.get("/test/model") - REMOVED
 if __name__ == "__main__":
     uvicorn.run(