import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM import torch # Load tiniest possible model (24M parameters) model_name = "microsoft/phi-1_5" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained( model_name, torch_dtype=torch.float16, device_map="auto" ) def generate(prompt): inputs = tokenizer(prompt, return_tensors="pt").to(model.device) with torch.inference_mode(): outputs = model.generate( **inputs, max_new_tokens=30, temperature=0.7, do_sample=True ) return tokenizer.decode(outputs[0], skip_special_tokens=True) # UI components emoji = "🔮" title = "Tiny Fortune Cookie Generator" description = "Uses Microsoft Phi-1.5 (24M params) to generate micro-wisdom" with gr.Blocks(theme="soft") as demo: gr.Markdown(f"## {emoji} {title}\n{description}") with gr.Row(): prompt_input = gr.Textbox( value="What's my fortune today?", label="Input Prompt", placeholder="Ask your question..." ) output_text = gr.Textbox(label="Your Fortune") generate_btn = gr.Button("Open Cookie 🍪", variant="primary") generate_btn.click( fn=generate, inputs=prompt_input, outputs=output_text ) demo.launch()