import gradio as gr from transformers import pipeline # Pipelines (same as before, unchanged) chat_model = pipeline("text-generation", model="Qwen/Qwen-1.5-0.5B-Chat", device=-1) caption_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base", device=-1) speech_model = pipeline("automatic-speech-recognition", model="openai/whisper-tiny", device=-1) # Functions (all safe and previously working) def process_text(user_input): result = chat_model(user_input, max_new_tokens=128, do_sample=True, temperature=0.7) return result[0]["generated_text"] def process_image(image): result = caption_model(image) return "✨ I see... " + result[0]["generated_text"] + " ✨" def process_audio(audio_path): transcript = speech_model(audio_path)["text"] reply = process_text(transcript) return f"šŸŽ¤ You said: **{transcript}**\n\nšŸ¤– Assistant: {reply}" with gr.Blocks(theme="soft", css=""" body { background: linear-gradient(135deg, #ff9a9e, #fad0c4 30%, #fad0c4 70%, #fbc2eb); font-family: 'Comic Sans MS', cursive; } #chatbox {height:200px} """) as demo: gr.HTML("

🌈✨ AssistAI — Your Magical Multimodal Buddy ✨🌈

") gr.Markdown("Welcome! Try text, images, or your voice and see the magic šŸ§™ā€ā™‚ļø\n---") with gr.Tab("šŸ’¬ Chat Genie"): txt_in = gr.Textbox(label="Ask me anything šŸ§ž", lines=3, placeholder="E.g. Tell me a riddle", elem_id="chatbox") btn_chat = gr.Button("✨ Generate Reply ✨") txt_out = gr.Textbox(label="Genie Responds", lines=8) examples = gr.Examples( examples=[ "Tell me a riddle", "Summarize Harry Potter in one sentence", "What's faster: a cheetah or WiFi?", "Write me a pirate poem about coding ā›µšŸ’»" ], inputs=[txt_in] ) btn_chat.click(process_text, inputs=txt_in, outputs=txt_out) with gr.Tab("šŸ–¼ļø Image Magic"): img_in = gr.Image(type="pil", label="Upload image šŸ–¼ļø") btn_img = gr.Button("šŸŖ„ Generate Caption") img_out = gr.Textbox(label="AI's magical description ✨", lines=5) btn_img.click(process_image, inputs=img_in, outputs=img_out) with gr.Tab("šŸŽ¤ Voice Spell"): aud_in = gr.Audio(sources=["microphone"], type="filepath", label="Speak your spell šŸŽ¤") btn_voice = gr.Button("šŸ”® Transcribe + Reply") aud_out = gr.Textbox(label="Transcript + Genie reply", lines=7) btn_voice.click(process_audio, inputs=aud_in, outputs=aud_out) demo.launch()