testspace / app.py
rui3000's picture
Update app.py
f17dc57 verified
# FILE 1: minimal_service.py (same as Step 1)
import spaces
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
# Global variables
_model = None
_tokenizer = None
_model_name = "microsoft/DialoGPT-small"
def initialize_tokenizer():
"""Initialize tokenizer"""
global _tokenizer
if _tokenizer is None:
print("[MinimalService] Loading tokenizer...")
_tokenizer = AutoTokenizer.from_pretrained(_model_name)
if _tokenizer.pad_token is None:
_tokenizer.pad_token = _tokenizer.eos_token
print("[MinimalService] Tokenizer loaded successfully.")
return _tokenizer
@spaces.GPU
def generate_text_gpu(prompt: str, max_tokens: int = 50):
"""GPU function for text generation"""
global _model, _tokenizer
print("[MinimalService] GPU function called")
# Initialize tokenizer
if _tokenizer is None:
initialize_tokenizer()
# Load model in GPU context
if _model is None:
print("[MinimalService] Loading model...")
_model = AutoModelForCausalLM.from_pretrained(
_model_name,
torch_dtype=torch.float16,
device_map="auto"
)
print("[MinimalService] Model loaded.")
# Simple generation
inputs = _tokenizer.encode(prompt, return_tensors="pt")
device = next(_model.parameters()).device
inputs = inputs.to(device)
with torch.no_grad():
outputs = _model.generate(
inputs,
max_new_tokens=max_tokens,
temperature=0.7,
do_sample=True,
pad_token_id=_tokenizer.eos_token_id
)
response = _tokenizer.decode(outputs[0], skip_special_tokens=True)
return response
class MinimalService:
def __init__(self):
print("[MinimalService] Service initialized")
initialize_tokenizer()
def generate(self, prompt: str):
"""Public method to generate text"""
return generate_text_gpu(prompt)
# Create instance
service = MinimalService()
# Print confirmation
print(f"[MinimalService] GPU function available: {generate_text_gpu.__name__}")
# ====================================
# FILE 2: app.py (Step 2 - with FastAPI)
import gradio as gr
import spaces
# Import the service
from minimal_service import service, generate_text_gpu
# Additional GPU function at app level
@spaces.GPU
def app_gpu_test():
"""Test GPU function at app level"""
return "App GPU function works"
print("[App] GPU functions imported successfully")
print(f"[App] Service GPU function: {generate_text_gpu.__name__}")
print(f"[App] App GPU function: {app_gpu_test.__name__}")
# ADD FASTAPI - Step 2 change
from fastapi import FastAPI
from fastapi.responses import RedirectResponse
def generate_response(user_input):
"""Generate response using the service"""
if not user_input.strip():
return "Please enter some text!"
try:
response = service.generate(user_input)
return f"Generated: {response}"
except Exception as e:
return f"Error: {str(e)}"
# Create Gradio interface
with gr.Blocks(title="Step 2: FastAPI Test") as demo:
gr.Markdown("# Step 2: Testing FastAPI + GPU")
gr.Markdown("Testing if adding FastAPI breaks GPU detection.")
with gr.Row():
input_text = gr.Textbox(
label="Enter text",
placeholder="Type something...",
value="Hello, how are you?"
)
output_text = gr.Textbox(
label="Generated response",
interactive=False
)
generate_btn = gr.Button("Generate", variant="primary")
generate_btn.click(
fn=generate_response,
inputs=[input_text],
outputs=[output_text]
)
# ADD FASTAPI MOUNTING
app = FastAPI()
@app.get("/")
async def root():
return RedirectResponse(url="/gradio")
# Mount Gradio on FastAPI
app = gr.mount_gradio_app(app, demo, path="/gradio")
print("[App] FastAPI + Gradio setup completed")
if __name__ == "__main__":
print("[App] Starting application...")
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860)