Spaces:

yukee1992
/

gemma-1b-script-generatorV2

Sleeping

App Files Files Community

yukee1992 commited on Aug 19, 2025

Commit

aa364cd

verified ·

1 Parent(s): 386c1c8

Update app.py

Browse files

Files changed (1) hide show

app.py +19 -71

app.py CHANGED Viewed

@@ -9,13 +9,12 @@ from fastapi.responses import JSONResponse
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import uvicorn
 from contextlib import asynccontextmanager
-from pydantic import BaseModel
 # Configuration
 MODEL_ID = "google/gemma-1.1-2b-it"
 HF_TOKEN = os.getenv("HF_TOKEN", "")
-MAX_TOKENS = 200  # Reduced for faster generation
-DEVICE = "cpu"
 PORT = int(os.getenv("PORT", 7860))
 # Setup logging
@@ -44,12 +43,11 @@ class ScriptGenerator:
             self.model = AutoModelForCausalLM.from_pretrained(
                 MODEL_ID,
                 torch_dtype=torch.float32,
-                device_map="auto",
                 token=HF_TOKEN,
                 low_cpu_mem_usage=True
             )
-            if DEVICE == "cuda":
-                self.model = self.model.cuda()
             self.loaded = True
             logger.info("✅ Model loaded successfully")
         except Exception as e:
@@ -69,17 +67,15 @@ def extract_topic(topic_input: Union[str, List[str]]) -> str:
     """Extract topic from string or array input"""
     if isinstance(topic_input, list):
         if topic_input:
-            return str(topic_input[0])  # Take first element if it's a list
         return "No topic provided"
     return str(topic_input)
 def generate_script(topic: str) -> str:
     """Generate script with error handling"""
     try:
-        # Clean the topic input
         clean_topic = topic.strip().strip("['").strip("']").strip('"').strip("'")
-        logger.info(f"🎯 Generating script for topic: '{clean_topic}'")
         prompt = (
             f"Create a short 1-minute video script about: {clean_topic[:80]}\n\n"
@@ -90,22 +86,13 @@ def generate_script(topic: str) -> str:
             "Script:"
         )
-        logger.info(f"📋 Prompt: {prompt[:100]}...")
-        # Tokenize with proper padding
         inputs = generator.tokenizer(
             prompt,
             return_tensors="pt",
             padding=True,
             truncation=True,
             max_length=512
-        )
-        # Move to device
-        if DEVICE == "cuda":
-            inputs = {k: v.cuda() for k, v in inputs.items()}
-        else:
-            inputs = {k: v for k, v in inputs.items()}
         # Generate with safer parameters
         with torch.no_grad():
@@ -115,17 +102,13 @@ def generate_script(topic: str) -> str:
                 do_sample=True,
                 top_p=0.8,
                 temperature=0.7,
-                pad_token_id=generator.tokenizer.eos_token_id,
-                repetition_penalty=1.1
             )
-        # Decode the output
         script = generator.tokenizer.decode(outputs[0], skip_special_tokens=True)
         clean_script = script.replace(prompt, "").strip()
-        logger.info(f"📝 Generated script: {clean_script[:100]}...")
-        logger.info(f"📏 Script length: {len(clean_script)} characters")
         return clean_script
     except Exception as e:
@@ -135,9 +118,8 @@ def generate_script(topic: str) -> str:
 async def process_job(job_id: str, topic_input: Union[str, List[str]], callback_url: str = None):
     """Background task to process job"""
     try:
-        # Extract and clean the topic
         topic = extract_topic(topic_input)
-        logger.info(f"🎯 Processing topic: '{topic}'")
         script = generate_script(topic)
         jobs[job_id] = {
@@ -148,20 +130,17 @@ async def process_job(job_id: str, topic_input: Union[str, List[str]], callback_
             "script_length": len(script)
         }
-        logger.info(f"✅ Completed job {job_id} - Script generated successfully")
         if callback_url:
             try:
-                logger.info(f"📤 Sending webhook to: {callback_url}")
                 async with httpx.AsyncClient(timeout=30.0) as client:
                     webhook_data = {
                         "job_id": job_id,
                         "status": "complete",
                         "result": script,
                         "topic": topic,
-                        "original_input": topic_input,
-                        "script_length": len(script)
                     }
                     response = await client.post(
@@ -170,17 +149,10 @@ async def process_job(job_id: str, topic_input: Union[str, List[str]], callback_
                         headers={"Content-Type": "application/json"}
                     )
-                    logger.info(f"📨 Webhook response status: {response.status_code}")
-                    if response.status_code == 200:
-                        logger.info(f"✅ Webhook delivered successfully to n8n")
-                    else:
-                        logger.error(f"❌ Webhook failed with status: {response.status_code}")
             except Exception as e:
                 logger.error(f"❌ Webhook failed: {str(e)}")
-        else:
-            logger.warning(f"⚠️ No callback URL provided for job {job_id}")
     except Exception as e:
         error_msg = f"Job failed: {str(e)}"
@@ -201,20 +173,14 @@ async def submit_job(request: Request, background_tasks: BackgroundTasks):
         data = await request.json()
         job_id = str(uuid.uuid4())
-        # Validate input
         if not data.get("topic"):
             raise HTTPException(status_code=400, detail="Topic is required")
         callback_url = data.get("callback_url")
         topic_input = data["topic"]
-        # Extract and log the topic
         topic = extract_topic(topic_input)
-        logger.info(f"📥 Received new job - ID: {job_id}")
-        logger.info(f"📝 Raw input: {topic_input}")
-        logger.info(f"🎯 Cleaned topic: '{topic}'")
-        logger.info(f"🔗 Callback URL: {callback_url or 'None'}")
         jobs[job_id] = {
             "status": "processing",
@@ -235,9 +201,7 @@ async def submit_job(request: Request, background_tasks: BackgroundTasks):
             "job_id": job_id,
             "status": "queued",
             "received_topic": topic,
-            "original_input": topic_input,
-            "callback_url": callback_url,
-            "message": "Job is being processed"
         })
     except Exception as e:
@@ -260,9 +224,7 @@ async def debug_jobs():
             job_id: {
                 "status": data["status"],
                 "topic": data.get("topic", "unknown"),
-                "original_input": data.get("original_input", "unknown"),
                 "script_length": data.get("script_length", 0),
-                "callback_url": data.get("callback_url"),
                 "error": data.get("error", "none")
             }
             for job_id, data in jobs.items()
@@ -275,32 +237,18 @@ async def health_check():
     return {
         "status": "healthy",
         "model_loaded": generator.loaded,
-        "total_jobs_processed": len(jobs),
-        "completed_jobs": sum(1 for job in jobs.values() if job.get("status") == "complete"),
-        "failed_jobs": sum(1 for job in jobs.values() if job.get("status") == "failed")
     }
 @app.get("/test/generation")
 async def test_generation():
-    """Test endpoint to verify script generation works"""
     try:
         test_topic = "healthy lifestyle tips"
-        logger.info(f"🧪 Testing generation with topic: {test_topic}")
         script = generate_script(test_topic)
-        return {
-            "status": "success",
-            "topic": test_topic,
-            "script": script,
-            "length": len(script)
-        }
     except Exception as e:
-        logger.error(f"❌ Test generation failed: {str(e)}", exc_info=True)
-        return {
-            "status": "error",
-            "error": str(e)
-        }
 if __name__ == "__main__":
     uvicorn.run(

 from transformers import AutoTokenizer, AutoModelForCausalLM
 import uvicorn
 from contextlib import asynccontextmanager
 # Configuration
 MODEL_ID = "google/gemma-1.1-2b-it"
 HF_TOKEN = os.getenv("HF_TOKEN", "")
+MAX_TOKENS = 200
+DEVICE = "cpu"  # Force CPU to avoid device_map issues
 PORT = int(os.getenv("PORT", 7860))
 # Setup logging
             self.model = AutoModelForCausalLM.from_pretrained(
                 MODEL_ID,
                 torch_dtype=torch.float32,
                 token=HF_TOKEN,
                 low_cpu_mem_usage=True
             )
+            # Simple device assignment without device_map
+            self.model = self.model.to(DEVICE)
             self.loaded = True
             logger.info("✅ Model loaded successfully")
         except Exception as e:
     """Extract topic from string or array input"""
     if isinstance(topic_input, list):
         if topic_input:
+            return str(topic_input[0])
         return "No topic provided"
     return str(topic_input)
 def generate_script(topic: str) -> str:
     """Generate script with error handling"""
     try:
         clean_topic = topic.strip().strip("['").strip("']").strip('"').strip("'")
+        logger.info(f"🎯 Generating script for: '{clean_topic}'")
         prompt = (
             f"Create a short 1-minute video script about: {clean_topic[:80]}\n\n"
             "Script:"
         )
         inputs = generator.tokenizer(
             prompt,
             return_tensors="pt",
             padding=True,
             truncation=True,
             max_length=512
+        ).to(DEVICE)
         # Generate with safer parameters
         with torch.no_grad():
                 do_sample=True,
                 top_p=0.8,
                 temperature=0.7,
+                pad_token_id=generator.tokenizer.eos_token_id
             )
         script = generator.tokenizer.decode(outputs[0], skip_special_tokens=True)
         clean_script = script.replace(prompt, "").strip()
+        logger.info(f"📝 Generated {len(clean_script)} characters")
         return clean_script
     except Exception as e:
 async def process_job(job_id: str, topic_input: Union[str, List[str]], callback_url: str = None):
     """Background task to process job"""
     try:
         topic = extract_topic(topic_input)
+        logger.info(f"🎯 Processing: '{topic}'")
         script = generate_script(topic)
         jobs[job_id] = {
             "script_length": len(script)
         }
+        logger.info(f"✅ Completed job {job_id}")
         if callback_url:
             try:
                 async with httpx.AsyncClient(timeout=30.0) as client:
                     webhook_data = {
                         "job_id": job_id,
                         "status": "complete",
                         "result": script,
                         "topic": topic,
+                        "original_input": topic_input
                     }
                     response = await client.post(
                         headers={"Content-Type": "application/json"}
                     )
+                    logger.info(f"📨 Webhook status: {response.status_code}")
             except Exception as e:
                 logger.error(f"❌ Webhook failed: {str(e)}")
     except Exception as e:
         error_msg = f"Job failed: {str(e)}"
         data = await request.json()
         job_id = str(uuid.uuid4())
         if not data.get("topic"):
             raise HTTPException(status_code=400, detail="Topic is required")
         callback_url = data.get("callback_url")
         topic_input = data["topic"]
         topic = extract_topic(topic_input)
+        logger.info(f"📥 Received job {job_id}: '{topic}'")
         jobs[job_id] = {
             "status": "processing",
             "job_id": job_id,
             "status": "queued",
             "received_topic": topic,
+            "callback_url": callback_url
         })
     except Exception as e:
             job_id: {
                 "status": data["status"],
                 "topic": data.get("topic", "unknown"),
                 "script_length": data.get("script_length", 0),
                 "error": data.get("error", "none")
             }
             for job_id, data in jobs.items()
     return {
         "status": "healthy",
         "model_loaded": generator.loaded,
+        "total_jobs": len(jobs)
     }
 @app.get("/test/generation")
 async def test_generation():
+    """Test script generation"""
     try:
         test_topic = "healthy lifestyle tips"
         script = generate_script(test_topic)
+        return {"status": "success", "topic": test_topic, "script": script}
     except Exception as e:
+        return {"status": "error", "error": str(e)}
 if __name__ == "__main__":
     uvicorn.run(