Spaces:

rui3000
/

testspace

Runtime error

App Files Files Community

rui3000 commited on Jun 12, 2025

Commit

f17dc57

verified ·

1 Parent(s): fc269b4

Update app.py

Browse files

Files changed (1) hide show

app.py +82 -5

app.py CHANGED Viewed

@@ -1,10 +1,87 @@
 import gradio as gr
 import spaces
-# Import the service - this should trigger GPU function registration
 from minimal_service import service, generate_text_gpu
-# Additional GPU function at app level for extra safety
 @spaces.GPU
 def app_gpu_test():
     """Test GPU function at app level"""
@@ -30,8 +107,8 @@ def generate_response(user_input):
         return f"Error: {str(e)}"
 # Create Gradio interface
-with gr.Blocks(title="Minimal GPU Test with FastAPI") as demo:
-    gr.Markdown("# Minimal GPU Test with FastAPI")
     gr.Markdown("Testing if adding FastAPI breaks GPU detection.")
     with gr.Row():
@@ -53,7 +130,7 @@ with gr.Blocks(title="Minimal GPU Test with FastAPI") as demo:
         outputs=[output_text]
     )
-# ADD FASTAPI MOUNTING - Step 2 change
 app = FastAPI()
 @app.get("/")

+# FILE 1: minimal_service.py (same as Step 1)
+import spaces
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+# Global variables
+_model = None
+_tokenizer = None
+_model_name = "microsoft/DialoGPT-small"
+def initialize_tokenizer():
+    """Initialize tokenizer"""
+    global _tokenizer
+    if _tokenizer is None:
+        print("[MinimalService] Loading tokenizer...")
+        _tokenizer = AutoTokenizer.from_pretrained(_model_name)
+        if _tokenizer.pad_token is None:
+            _tokenizer.pad_token = _tokenizer.eos_token
+        print("[MinimalService] Tokenizer loaded successfully.")
+    return _tokenizer
+@spaces.GPU
+def generate_text_gpu(prompt: str, max_tokens: int = 50):
+    """GPU function for text generation"""
+    global _model, _tokenizer
+    print("[MinimalService] GPU function called")
+    # Initialize tokenizer
+    if _tokenizer is None:
+        initialize_tokenizer()
+    # Load model in GPU context
+    if _model is None:
+        print("[MinimalService] Loading model...")
+        _model = AutoModelForCausalLM.from_pretrained(
+            _model_name,
+            torch_dtype=torch.float16,
+            device_map="auto"
+        )
+        print("[MinimalService] Model loaded.")
+    # Simple generation
+    inputs = _tokenizer.encode(prompt, return_tensors="pt")
+    device = next(_model.parameters()).device
+    inputs = inputs.to(device)
+    with torch.no_grad():
+        outputs = _model.generate(
+            inputs,
+            max_new_tokens=max_tokens,
+            temperature=0.7,
+            do_sample=True,
+            pad_token_id=_tokenizer.eos_token_id
+        )
+    response = _tokenizer.decode(outputs[0], skip_special_tokens=True)
+    return response
+class MinimalService:
+    def __init__(self):
+        print("[MinimalService] Service initialized")
+        initialize_tokenizer()
+    def generate(self, prompt: str):
+        """Public method to generate text"""
+        return generate_text_gpu(prompt)
+# Create instance
+service = MinimalService()
+# Print confirmation
+print(f"[MinimalService] GPU function available: {generate_text_gpu.__name__}")
+# ====================================
+# FILE 2: app.py (Step 2 - with FastAPI)
 import gradio as gr
 import spaces
+# Import the service
 from minimal_service import service, generate_text_gpu
+# Additional GPU function at app level
 @spaces.GPU
 def app_gpu_test():
     """Test GPU function at app level"""
         return f"Error: {str(e)}"
 # Create Gradio interface
+with gr.Blocks(title="Step 2: FastAPI Test") as demo:
+    gr.Markdown("# Step 2: Testing FastAPI + GPU")
     gr.Markdown("Testing if adding FastAPI breaks GPU detection.")
     with gr.Row():
         outputs=[output_text]
     )
+# ADD FASTAPI MOUNTING
 app = FastAPI()
 @app.get("/")