import gradio as gr import os import base64 import io from PIL import Image from groq import Groq # Initialize Groq client client = Groq(api_key=os.environ["GROQ_API_KEY"]) # Convert PIL image to base64 image URL def image_to_data_url(pil_image): buffered = io.BytesIO() pil_image.save(buffered, format="JPEG") encoded_image = base64.b64encode(buffered.getvalue()).decode("utf-8") return f"data:image/jpeg;base64,{encoded_image}" # Multimodal chatbot handler def multimodal_chat(user_text, user_image): content = [{"type": "text", "text": user_text}] if user_image: content.append({ "type": "image_url", "image_url": {"url": image_to_data_url(user_image)} }) # Call Groq multimodal chat stream = client.chat.completions.create( model="meta-llama/llama-4-scout-17b-16e-instruct", messages=[{"role": "user", "content": content}], temperature=1, max_completion_tokens=1024, top_p=1, stream=True ) # Stream output reply = "" for chunk in stream: reply += chunk.choices[0].delta.content or "" return reply # Gradio Interface demo = gr.Interface( fn=multimodal_chat, inputs=[ gr.Textbox(label="Your Message"), gr.Image(type="pil", label="Upload Image (optional)") ], outputs=gr.Textbox(label="Assistant Reply", lines=10), title="🧠 Simple Multimodal Chatbot", description="Type a question and optionally upload an image. Powered by Groq + LLaMA-4 Scout." ) if __name__ == "__main__": demo.launch()