Spaces:

yukee1992
/

gemma-1b-script-generatorV2

Sleeping

App Files Files Community

yukee1992 commited on Aug 16, 2025

Commit

04a5c1f

verified ·

1 Parent(s): c56dee0

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -44

app.py CHANGED Viewed

@@ -2,8 +2,8 @@ import os
 import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import gradio as gr
-from fastapi import FastAPI
-from typing import Dict, Any
 # Configuration
 MODEL_ID = "google/gemma-1.1-2b-it"
@@ -11,7 +11,6 @@ HF_TOKEN = os.getenv("HF_TOKEN", "")
 MAX_TOKENS = 80
 MAX_INPUT_LENGTH = 100
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
-PORT = int(os.getenv("PORT", 7860))  # Default port with override
 class ScriptGenerator:
     def __init__(self):
@@ -20,31 +19,29 @@ class ScriptGenerator:
         self.loaded = False
     def load_model(self):
-        """Safe model loading with progress tracking"""
         if self.loaded:
             return
         print("🔄 Loading model...")
         try:
             self.tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, token=HF_TOKEN)
             self.model = AutoModelForCausalLM.from_pretrained(
                 MODEL_ID,
                 torch_dtype=torch.float32 if DEVICE == "cpu" else torch.float16,
                 device_map="auto" if DEVICE == "cuda" else None,
                 token=HF_TOKEN
             ).to(DEVICE)
             self.loaded = True
-            print("✅ Model loaded successfully!")
         except Exception as e:
-            print(f"❌ Model loading failed: {str(e)}")
             raise
 generator = ScriptGenerator()
 def predict(topic: str) -> str:
-    """Generate script with proper error handling"""
     try:
         if not topic or len(topic) > MAX_INPUT_LENGTH:
             return f"Topic must be 1-{MAX_INPUT_LENGTH} characters"
@@ -62,52 +59,44 @@ def predict(topic: str) -> str:
                 temperature=0.7,
                 do_sample=True
             )
         return generator.tokenizer.decode(outputs[0], skip_special_tokens=True)
-    except torch.cuda.OutOfMemoryError:
-        return "Error: GPU out of memory - try a shorter input"
     except Exception as e:
         return f"Error: {str(e)}"
-# Create Gradio interface
-interface = gr.Interface(
-    fn=predict,
-    inputs=gr.Textbox(label="Topic", placeholder="Enter your topic..."),
-    outputs=gr.Textbox(label="Generated Script", lines=5),
-    title="Gemma Script Generator",
-    allow_flagging="never"
-)
-# Create FastAPI app
 app = FastAPI()
 # Add API endpoint
 @app.post("/api/predict")
-async def api_predict(topic: str):
-    return {
-        "success": True,
-        "result": predict(topic),
-        "error": None
-    }
-# Mount Gradio interface
 app = gr.mount_gradio_app(app, interface, path="/")
-# Launch configuration
 if __name__ == "__main__":
     generator.load_model()
-    # Disable Gradio's internal port scanning
-    os.environ["GRADIO_SERVER_PORT"] = str(PORT)
-    os.environ["GRADIO_SERVER_NAME"] = "0.0.0.0"
-    interface.launch(
-        server_name="0.0.0.0",
-        server_port=PORT,
-        share=False,
-        prevent_thread_lock=True,  # Required for Hugging Face Spaces
-        show_error=True,
-        debug=False,  # Disable debug mode to prevent port scanning
-        ssl_verify=False  # Disable SSL verification for internal calls
-    )

 import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import gradio as gr
+from fastapi import FastAPI, Request
+from fastapi.responses import JSONResponse
 # Configuration
 MODEL_ID = "google/gemma-1.1-2b-it"
 MAX_TOKENS = 80
 MAX_INPUT_LENGTH = 100
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 class ScriptGenerator:
     def __init__(self):
         self.loaded = False
     def load_model(self):
+        """Safe model loading"""
         if self.loaded:
             return
         print("🔄 Loading model...")
         try:
             self.tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, token=HF_TOKEN)
             self.model = AutoModelForCausalLM.from_pretrained(
                 MODEL_ID,
                 torch_dtype=torch.float32 if DEVICE == "cpu" else torch.float16,
                 device_map="auto" if DEVICE == "cuda" else None,
                 token=HF_TOKEN
             ).to(DEVICE)
             self.loaded = True
+            print("✅ Model loaded!")
         except Exception as e:
+            print(f"❌ Loading failed: {str(e)}")
             raise
 generator = ScriptGenerator()
 def predict(topic: str) -> str:
+    """Generate script with error handling"""
     try:
         if not topic or len(topic) > MAX_INPUT_LENGTH:
             return f"Topic must be 1-{MAX_INPUT_LENGTH} characters"
                 temperature=0.7,
                 do_sample=True
             )
         return generator.tokenizer.decode(outputs[0], skip_special_tokens=True)
     except Exception as e:
         return f"Error: {str(e)}"
+# Create FastAPI app first
 app = FastAPI()
 # Add API endpoint
 @app.post("/api/predict")
+async def api_predict(request: Request):
+    try:
+        data = await request.json()
+        topic = data.get("topic", "")
+        return JSONResponse({
+            "success": True,
+            "result": predict(topic),
+            "error": None
+        })
+    except Exception as e:
+        return JSONResponse({
+            "success": False,
+            "result": None,
+            "error": str(e)
+        }, status_code=500)
+# Create Gradio interface
+interface = gr.Interface(
+    fn=predict,
+    inputs=gr.Textbox(label="Topic"),
+    outputs=gr.Textbox(label="Script", lines=5),
+    title="Gemma Script Generator"
+)
+# Mount Gradio app
 app = gr.mount_gradio_app(app, interface, path="/")
 if __name__ == "__main__":
     generator.load_model()
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=int(os.getenv("PORT", 7860)))