import gradio as gr
from transformers import pipeline
# Pipelines (same as before, unchanged)
chat_model = pipeline("text-generation", model="Qwen/Qwen-1.5-0.5B-Chat", device=-1)
caption_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base", device=-1)
speech_model = pipeline("automatic-speech-recognition", model="openai/whisper-tiny", device=-1)
# Functions (all safe and previously working)
def process_text(user_input):
result = chat_model(user_input, max_new_tokens=128, do_sample=True, temperature=0.7)
return result[0]["generated_text"]
def process_image(image):
result = caption_model(image)
return "⨠I see... " + result[0]["generated_text"] + " āØ"
def process_audio(audio_path):
transcript = speech_model(audio_path)["text"]
reply = process_text(transcript)
return f"š¤ You said: **{transcript}**\n\nš¤ Assistant: {reply}"
with gr.Blocks(theme="soft", css="""
body {
background: linear-gradient(135deg, #ff9a9e, #fad0c4 30%, #fad0c4 70%, #fbc2eb);
font-family: 'Comic Sans MS', cursive;
}
#chatbox {height:200px}
""") as demo:
gr.HTML("
šāØ AssistAI ā Your Magical Multimodal Buddy āØš
")
gr.Markdown("Welcome! Try text, images, or your voice and see the magic š§āāļø\n---")
with gr.Tab("š¬ Chat Genie"):
txt_in = gr.Textbox(label="Ask me anything š§", lines=3, placeholder="E.g. Tell me a riddle", elem_id="chatbox")
btn_chat = gr.Button("⨠Generate Reply āØ")
txt_out = gr.Textbox(label="Genie Responds", lines=8)
examples = gr.Examples(
examples=[
"Tell me a riddle",
"Summarize Harry Potter in one sentence",
"What's faster: a cheetah or WiFi?",
"Write me a pirate poem about coding āµš»"
],
inputs=[txt_in]
)
btn_chat.click(process_text, inputs=txt_in, outputs=txt_out)
with gr.Tab("š¼ļø Image Magic"):
img_in = gr.Image(type="pil", label="Upload image š¼ļø")
btn_img = gr.Button("šŖ Generate Caption")
img_out = gr.Textbox(label="AI's magical description āØ", lines=5)
btn_img.click(process_image, inputs=img_in, outputs=img_out)
with gr.Tab("š¤ Voice Spell"):
aud_in = gr.Audio(sources=["microphone"], type="filepath", label="Speak your spell š¤")
btn_voice = gr.Button("š® Transcribe + Reply")
aud_out = gr.Textbox(label="Transcript + Genie reply", lines=7)
btn_voice.click(process_audio, inputs=aud_in, outputs=aud_out)
demo.launch()