Spaces:

yukee1992
/

gemma-1b-script-generatorV2

Paused

App Files Files Community

yukee1992 commited on Aug 19, 2025

Commit

80aadbe

verified ·

1 Parent(s): 8ae56b5

Update app.py

Browse files

Files changed (1) hide show

app.py +113 -46

app.py CHANGED Viewed

@@ -3,7 +3,7 @@ import uuid
 import httpx
 import torch
 import logging
-import time
 from typing import Dict, Optional, List, Union
 from fastapi import FastAPI, Request, BackgroundTasks, HTTPException, Depends
 from fastapi.responses import JSONResponse
@@ -16,7 +16,7 @@ from contextlib import asynccontextmanager
 MODEL_ID = "google/gemma-1.1-2b-it"
 HF_TOKEN = os.getenv("HF_TOKEN", "")
 API_KEY = os.getenv("API_KEY", "default-key-123")
-MAX_TOKENS = 150
 DEVICE = "cpu"
 PORT = int(os.getenv("PORT", 7860))
@@ -72,39 +72,41 @@ class ScriptGenerator:
 generator = ScriptGenerator()
 async def verify_api_key(credentials: HTTPAuthorizationCredentials = Depends(security)):
-    """Verify API key - but allow Hugging Face monitoring"""
-    # Allow internal Hugging Face IPs without API key for health checks
-    # This prevents the constant model generation from their monitoring
     if credentials.credentials != API_KEY:
-        # Check if this is likely Hugging Face internal monitoring
-        # (you can add more sophisticated checks here if needed)
         raise HTTPException(status_code=401, detail="Invalid API key")
     return True
-def is_huggingface_monitoring(request: Request) -> bool:
-    """Check if request is from Hugging Face monitoring"""
-    client_host = request.client.host
-    # Hugging Face internal IP ranges
-    hf_ips = ["10.16.", "10.20.", "10.24."]
-    return any(client_host.startswith(ip) for ip in hf_ips)
 @asynccontextmanager
 async def lifespan(app: FastAPI):
-    # Load model but don't block startup
-    # Model will load on first real request
     logger.info("🚀 API Server starting up...")
     yield
 app = FastAPI(lifespan=lifespan)
 def extract_topic(topic_input: Union[str, List[str]]) -> str:
     if isinstance(topic_input, list):
         if topic_input:
             return str(topic_input[0])
         return "No topic provided"
     return str(topic_input)
 def generate_script(topic: str) -> str:
     try:
         if not generator.loaded:
             if not generator.load_model():
@@ -114,15 +116,30 @@ def generate_script(topic: str) -> str:
         logger.info(f"🎯 Generating script for: '{clean_topic}'")
         prompt = (
-            f"Create a 60-second video script about: {clean_topic[:50]}\n\n"
-            "1) Hook (10s)\n2) Content (40s)\n3) CTA (10s)\n\nScript:"
         )
         inputs = generator.tokenizer(
             prompt,
             return_tensors="pt",
             truncation=True,
-            max_length=256
         )
         inputs = {k: v.to(DEVICE) for k, v in inputs.items()}
@@ -133,52 +150,64 @@ def generate_script(topic: str) -> str:
                 max_new_tokens=MAX_TOKENS,
                 do_sample=True,
                 top_p=0.9,
-                temperature=0.7,
                 pad_token_id=generator.tokenizer.eos_token_id,
             )
         script = generator.tokenizer.decode(outputs[0], skip_special_tokens=True)
         clean_script = script.replace(prompt, "").strip()
-        if not clean_script:
-            clean_script = "Script generation completed but returned empty content."
-        logger.info(f"📝 Generated {len(clean_script)} characters")
-        return clean_script
     except Exception as e:
         logger.error(f"❌ Script generation failed: {str(e)}")
         raise
 async def process_job(job_id: str, topic_input: Union[str, List[str]], callback_url: str = None):
     try:
         topic = extract_topic(topic_input)
         logger.info(f"🎯 Processing: '{topic}'")
         script = generate_script(topic)
         jobs[job_id] = {
             "status": "complete",
             "result": script,
             "topic": topic,
-            "script_length": len(script)
         }
         logger.info(f"✅ Completed job {job_id}")
         if callback_url:
             try:
                 async with httpx.AsyncClient(timeout=30.0) as client:
                     response = await client.post(
                         callback_url,
-                        json={
-                            "job_id": job_id,
-                            "status": "complete",
-                            "result": script,
-                            "topic": topic
-                        },
                         headers={"Content-Type": "application/json"}
                     )
                     logger.info(f"📨 Webhook status: {response.status_code}")
             except Exception as e:
                 logger.error(f"❌ Webhook failed: {str(e)}")
@@ -186,11 +215,28 @@ async def process_job(job_id: str, topic_input: Union[str, List[str]], callback_
         error_msg = f"Job failed: {str(e)}"
         logger.error(f"❌ Job {job_id} failed: {error_msg}")
         jobs[job_id] = {
             "status": "failed",
             "error": error_msg,
             "topic": extract_topic(topic_input) if topic_input else "unknown"
         }
 @app.post("/api/submit")
 async def submit_job(
@@ -198,11 +244,12 @@ async def submit_job(
     background_tasks: BackgroundTasks,
     auth: bool = Depends(verify_api_key)
 ):
-    """Main endpoint for script generation"""
     try:
         data = await request.json()
         job_id = str(uuid.uuid4())
         if not data.get("topic"):
             raise HTTPException(status_code=400, detail="Topic is required")
@@ -212,12 +259,14 @@ async def submit_job(
         logger.info(f"📥 Received job {job_id}: '{topic}'")
         jobs[job_id] = {
             "status": "processing",
             "callback_url": callback_url,
             "topic": topic
         }
         background_tasks.add_task(
             process_job,
             job_id,
@@ -228,7 +277,9 @@ async def submit_job(
         return JSONResponse({
             "job_id": job_id,
             "status": "queued",
-            "topic": topic
         })
     except Exception as e:
@@ -243,26 +294,29 @@ async def get_status(job_id: str, auth: bool = Depends(verify_api_key)):
     return jobs[job_id]
 @app.get("/health")
-async def health_check(request: Request):
-    """Health check endpoint - lightweight for monitoring"""
-    # Return immediate response without model loading for monitoring
     return {
         "status": "healthy",
         "model_loaded": generator.loaded,
         "total_jobs": len(jobs),
-        "monitoring": is_huggingface_monitoring(request)
     }
 @app.get("/test/generation")
-async def test_generation(request: Request, auth: bool = Depends(verify_api_key)):
-    """Test endpoint - only works with API key"""
-    # This won't be triggered by HF monitoring because it requires API key
     try:
         if not generator.loaded:
             if not generator.load_model():
                 return {"status": "error", "error": "Model failed to load"}
-        test_topic = "healthy lifestyle"
         logger.info(f"🧪 Testing generation with: {test_topic}")
         script = generate_script(test_topic)
@@ -271,16 +325,29 @@ async def test_generation(request: Request, auth: bool = Depends(verify_api_key)
             "status": "success",
             "topic": test_topic,
             "script_length": len(script),
-            "script_preview": script[:200] + "..." if len(script) > 200 else script
         }
     except Exception as e:
         logger.error(f"❌ Test generation failed: {str(e)}")
         return {"status": "error", "error": str(e)}
-# Remove public debug endpoints that were causing the issue
-# @app.get("/debug/jobs") - REMOVED
-# @app.get("/test/model") - REMOVED
 if __name__ == "__main__":
     uvicorn.run(

 import httpx
 import torch
 import logging
+import re
 from typing import Dict, Optional, List, Union
 from fastapi import FastAPI, Request, BackgroundTasks, HTTPException, Depends
 from fastapi.responses import JSONResponse
 MODEL_ID = "google/gemma-1.1-2b-it"
 HF_TOKEN = os.getenv("HF_TOKEN", "")
 API_KEY = os.getenv("API_KEY", "default-key-123")
+MAX_TOKENS = 450
 DEVICE = "cpu"
 PORT = int(os.getenv("PORT", 7860))
 generator = ScriptGenerator()
 async def verify_api_key(credentials: HTTPAuthorizationCredentials = Depends(security)):
+    """Verify API key"""
     if credentials.credentials != API_KEY:
         raise HTTPException(status_code=401, detail="Invalid API key")
     return True
 @asynccontextmanager
 async def lifespan(app: FastAPI):
     logger.info("🚀 API Server starting up...")
     yield
 app = FastAPI(lifespan=lifespan)
 def extract_topic(topic_input: Union[str, List[str]]) -> str:
+    """Extract topic from string or array input"""
     if isinstance(topic_input, list):
         if topic_input:
             return str(topic_input[0])
         return "No topic provided"
     return str(topic_input)
+def format_script(script: str) -> str:
+    """Clean and format the generated script"""
+    # Remove any leftover prompt text
+    script = script.split("SCRIPT:")[-1].strip()
+    # Ensure proper line breaks for timestamps
+    script = re.sub(r'(\[\d+:\d+)', r'\n\1', script)
+    # Clean up multiple newlines
+    script = re.sub(r'\n\s*\n', '\n\n', script)
+    return script.strip()
 def generate_script(topic: str) -> str:
+    """Generate high-quality video script"""
     try:
         if not generator.loaded:
             if not generator.load_model():
         logger.info(f"🎯 Generating script for: '{clean_topic}'")
         prompt = (
+            f"Create a detailed 60-second YouTube/TikTok video script about: {clean_topic}\n\n"
+            "REQUIREMENTS:\n"
+            "- Total duration: 60 seconds exactly\n"
+            "- Engaging hook in first 5 seconds\n"
+            "- Clear structure with timestamps every 10-15 seconds\n"
+            "- Conversational, engaging tone for social media\n"
+            "- End with strong call-to-action\n"
+            "- Include both voiceover and visual descriptions\n"
+            "- Minimum 800 characters for proper 60-second video\n\n"
+            "SCRIPT FORMAT:\n"
+            "[0:00-0:05] HOOK: Grab attention immediately\n"
+            "[0:05-0:15] INTRODUCTION: Introduce topic and yourself\n"
+            "[0:15-0:45] MAIN CONTENT: 2-3 key points with examples\n"
+            "[0:45-0:55] BENEFIT: Why this matters to viewers\n"
+            "[0:55-1:00] CTA: Clear call to action (follow, comment, like)\n\n"
+            "Include both VOICEOVER and VISUAL descriptions.\n\n"
+            "SCRIPT:"
         )
         inputs = generator.tokenizer(
             prompt,
             return_tensors="pt",
             truncation=True,
+            max_length=512
         )
         inputs = {k: v.to(DEVICE) for k, v in inputs.items()}
                 max_new_tokens=MAX_TOKENS,
                 do_sample=True,
                 top_p=0.9,
+                temperature=0.8,
                 pad_token_id=generator.tokenizer.eos_token_id,
+                repetition_penalty=1.1
             )
         script = generator.tokenizer.decode(outputs[0], skip_special_tokens=True)
         clean_script = script.replace(prompt, "").strip()
+        # Format the script
+        formatted_script = format_script(clean_script)
+        logger.info(f"📝 Generated {len(formatted_script)} characters")
+        return formatted_script
     except Exception as e:
         logger.error(f"❌ Script generation failed: {str(e)}")
         raise
 async def process_job(job_id: str, topic_input: Union[str, List[str]], callback_url: str = None):
+    """Background task to process job"""
     try:
         topic = extract_topic(topic_input)
         logger.info(f"🎯 Processing: '{topic}'")
         script = generate_script(topic)
+        # Store job results
         jobs[job_id] = {
             "status": "complete",
             "result": script,
             "topic": topic,
+            "script_length": len(script),
+            "formatted": True
         }
         logger.info(f"✅ Completed job {job_id}")
+        # Send webhook callback if URL provided
         if callback_url:
             try:
                 async with httpx.AsyncClient(timeout=30.0) as client:
+                    webhook_data = {
+                        "job_id": job_id,
+                        "status": "complete",
+                        "result": script,
+                        "topic": topic,
+                        "script_length": len(script),
+                        "formatted": True
+                    }
                     response = await client.post(
                         callback_url,
+                        json=webhook_data,
                         headers={"Content-Type": "application/json"}
                     )
                     logger.info(f"📨 Webhook status: {response.status_code}")
             except Exception as e:
                 logger.error(f"❌ Webhook failed: {str(e)}")
         error_msg = f"Job failed: {str(e)}"
         logger.error(f"❌ Job {job_id} failed: {error_msg}")
+        # Store failure information
         jobs[job_id] = {
             "status": "failed",
             "error": error_msg,
             "topic": extract_topic(topic_input) if topic_input else "unknown"
         }
+        # Send failure webhook if callback URL exists
+        if callback_url:
+            try:
+                async with httpx.AsyncClient(timeout=10.0) as client:
+                    await client.post(
+                        callback_url,
+                        json={
+                            "job_id": job_id,
+                            "status": "failed",
+                            "error": error_msg,
+                            "topic": extract_topic(topic_input) if topic_input else "unknown"
+                        }
+                    )
+            except Exception:
+                logger.error("Failed to send error webhook")
 @app.post("/api/submit")
 async def submit_job(
     background_tasks: BackgroundTasks,
     auth: bool = Depends(verify_api_key)
 ):
+    """Endpoint to submit new job"""
     try:
         data = await request.json()
         job_id = str(uuid.uuid4())
+        # Validate input
         if not data.get("topic"):
             raise HTTPException(status_code=400, detail="Topic is required")
         logger.info(f"📥 Received job {job_id}: '{topic}'")
+        # Store initial job data
         jobs[job_id] = {
             "status": "processing",
             "callback_url": callback_url,
             "topic": topic
         }
+        # Process job in background
         background_tasks.add_task(
             process_job,
             job_id,
         return JSONResponse({
             "job_id": job_id,
             "status": "queued",
+            "topic": topic,
+            "estimated_time": "70-90 seconds",
+            "message": "Script generation started"
         })
     except Exception as e:
     return jobs[job_id]
 @app.get("/health")
+async def health_check():
+    """Health check endpoint"""
+    completed_jobs = [job for job in jobs.values() if job.get("status") == "complete"]
+    avg_length = sum(job.get("script_length", 0) for job in completed_jobs) / max(1, len(completed_jobs))
     return {
         "status": "healthy",
         "model_loaded": generator.loaded,
         "total_jobs": len(jobs),
+        "completed_jobs": len(completed_jobs),
+        "failed_jobs": sum(1 for job in jobs.values() if job.get("status") == "failed"),
+        "average_script_length": round(avg_length, 2)
     }
 @app.get("/test/generation")
+async def test_generation(auth: bool = Depends(verify_api_key)):
+    """Test script generation"""
     try:
         if not generator.loaded:
             if not generator.load_model():
                 return {"status": "error", "error": "Model failed to load"}
+        test_topic = "the future of artificial intelligence in healthcare"
         logger.info(f"🧪 Testing generation with: {test_topic}")
         script = generate_script(test_topic)
             "status": "success",
             "topic": test_topic,
             "script_length": len(script),
+            "script_preview": script[:300] + "..." if len(script) > 300 else script,
+            "estimated_duration": "60 seconds",
+            "quality": "good" if len(script) >= 800 else "needs improvement"
         }
     except Exception as e:
         logger.error(f"❌ Test generation failed: {str(e)}")
         return {"status": "error", "error": str(e)}
+@app.get("/")
+async def root():
+    """Root endpoint"""
+    return {
+        "message": "Video Script Generator API",
+        "version": "1.0",
+        "endpoints": {
+            "submit_job": "POST /api/submit",
+            "check_status": "GET /api/status/{job_id}",
+            "health": "GET /health",
+            "test": "GET /test/generation"
+        },
+        "status": "operational"
+    }
 if __name__ == "__main__":
     uvicorn.run(