Spaces:
Sleeping
Sleeping
| # app.py - small-model friendly Gradio | |
| import os | |
| import gradio as gr | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| import torch | |
| MODEL_ID = os.environ.get("MODEL_ID", "Salesforce/codegen-6B-multi") | |
| DEVICE = "cuda" if torch.cuda.is_available() else "cpu" | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True) | |
| model = AutoModelForCausalLM.from_pretrained(MODEL_ID, trust_remote_code=True) | |
| model.to(DEVICE) | |
| model.eval() | |
| def generate(prompt, max_new_tokens=64, temperature=0.2): | |
| inputs = tokenizer(prompt, return_tensors="pt", truncation=True).to(DEVICE) | |
| with torch.no_grad(): | |
| out = model.generate(**inputs, max_new_tokens=int(max_new_tokens), temperature=float(temperature)) | |
| text = tokenizer.decode(out[0], skip_special_tokens=True) | |
| if text.startswith(prompt): text = text[len(prompt):].lstrip() | |
| return text | |
| with gr.Blocks() as demo: | |
| gr.Markdown("## Code assistant") | |
| prompt = gr.Textbox(lines=6, label="Prompt") | |
| max_tokens = gr.Slider(16, 256, value=64, step=16, label="Max new tokens") | |
| temp = gr.Slider(0.0, 1.0, value=0.2, step=0.01, label="Temperature") | |
| out = gr.Textbox(lines=12, label="Output") | |
| btn = gr.Button("Generate") | |
| btn.click(generate, inputs=[prompt, max_tokens, temp], outputs=[out]) | |
| PORT = int(os.environ.get("PORT", 7860)) | |
| demo.launch(server_name="0.0.0.0", server_port=PORT) | |