import gradio as gr from huggingface_hub import InferenceClient # Use conversational endpoint client = InferenceClient("meta-llama/Llama-3.2-1B-Instruct") def generate(prompt, temperature=0.8, max_tokens=256): try: # Use conversational endpoint messages = [{"role": "user", "content": prompt}] response = client.chat_completion( messages=messages, temperature=temperature, max_tokens=max_tokens ) return response.choices[0].message.content except Exception as e: return f"Error: {str(e)}" with gr.Blocks(title="amkyaw-coder") as demo: gr.Markdown("# amkyaw-coder\n🤖 Code Generation Model (via HF Inference)") with gr.Row(): with gr.Column(): prompt = gr.Textbox(label="Prompt", lines=4, placeholder="Enter your prompt here...") temperature = gr.Slider(0.1, 2.0, value=0.8, step=0.1, label="Temperature") max_tokens = gr.Slider(32, 512, value=128, step=32, label="Max Tokens") submit = gr.Button("Generate", variant="primary") with gr.Column(): output = gr.Textbox(label="Output", lines=15) submit.click(generate, inputs=[prompt, temperature, max_tokens], outputs=output) if __name__ == "__main__": demo.launch(server_name="0.0.0.0", server_port=7860)