import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer # Carica il modello e il tokenizer model_name = "Qwen/Qwen3-235B-A22B" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto") def generate_text(prompt, max_length=200, temperature=0.7): # Tokenizza l'input inputs = tokenizer(prompt, return_tensors="pt").to(model.device) # Genera la risposta outputs = model.generate( inputs["input_ids"], max_length=max_length, temperature=temperature, do_sample=True, top_p=0.9, ) # Decodifica la risposta response = tokenizer.decode(outputs[0], skip_special_tokens=True) return response # Crea l'interfaccia Gradio with gr.Blocks() as demo: gr.Markdown("# Qwen3-235B Demo") with gr.Row(): with gr.Column(): prompt_input = gr.Textbox(label="Il tuo prompt", lines=4) temperature = gr.Slider(minimum=0.1, maximum=1.0, value=0.7, label="Temperatura") max_length = gr.Slider(minimum=50, maximum=500, value=200, step=10, label="Lunghezza massima") submit_btn = gr.Button("Genera") with gr.Column(): output = gr.Textbox(label="Risposta generata", lines=8) submit_btn.click( generate_text, inputs=[prompt_input, max_length, temperature], outputs=[output] ) # Avvia l'applicazione if __name__ == "__main__": demo.launch()