Spaces:
Configuration error
Configuration error
| import os | |
| import gradio as gr | |
| from fastapi import FastAPI | |
| from transformers import AutoTokenizer, pipeline | |
| import threading | |
| MODEL = os.environ.get("MODEL_NAME", "google/gemma-4-E2B") | |
| tokenizer = None | |
| generator = None | |
| _model_lock = threading.Lock() | |
| def load_model(): | |
| global tokenizer, generator | |
| with _model_lock: | |
| if tokenizer is not None and generator is not None: | |
| return | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL, trust_remote_code=True) | |
| from transformers import AutoModelForCausalLM | |
| model = AutoModelForCausalLM.from_pretrained( | |
| MODEL, | |
| device_map={"": "cpu"}, | |
| torch_dtype="float32", | |
| low_cpu_mem_usage=True, | |
| trust_remote_code=True | |
| ) | |
| generator = pipeline("text-generation", model=model, tokenizer=tokenizer, device=-1) | |
| def generate(prompt): | |
| if generator is None: | |
| load_model() | |
| out = generator(prompt, max_new_tokens=64, do_sample=False) | |
| return out[0]["generated_text"] | |
| demo = gr.Interface(fn=generate, inputs=gr.Textbox(lines=4, label="Prompt"), outputs="text", title="Gemma-4-E2B (CPU)") | |
| app = FastAPI() | |
| app = gr.mount_gradio_app(app, demo, path="/") | |