Spaces:
Sleeping
Sleeping
File size: 2,654 Bytes
1163594 21667ef 2dbba12 b2cc3a3 21667ef 2dbba12 21667ef 6a2c4b8 21667ef 2dbba12 21667ef 6a2c4b8 2dbba12 6a2c4b8 2dbba12 6a2c4b8 21667ef 2dbba12 6a2c4b8 2dbba12 6a2c4b8 2dbba12 6a2c4b8 2dbba12 21667ef 6a2c4b8 2dbba12 6a2c4b8 2dbba12 21667ef 6a2c4b8 2dbba12 21667ef 2dbba12 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 | import gradio as gr
from transformers import pipeline
# Pipelines (same as before, unchanged)
chat_model = pipeline("text-generation", model="Qwen/Qwen-1.5-0.5B-Chat", device=-1)
caption_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base", device=-1)
speech_model = pipeline("automatic-speech-recognition", model="openai/whisper-tiny", device=-1)
# Functions (all safe and previously working)
def process_text(user_input):
result = chat_model(user_input, max_new_tokens=128, do_sample=True, temperature=0.7)
return result[0]["generated_text"]
def process_image(image):
result = caption_model(image)
return "โจ I see... " + result[0]["generated_text"] + " โจ"
def process_audio(audio_path):
transcript = speech_model(audio_path)["text"]
reply = process_text(transcript)
return f"๐ค You said: **{transcript}**\n\n๐ค Assistant: {reply}"
with gr.Blocks(theme="soft", css="""
body {
background: linear-gradient(135deg, #ff9a9e, #fad0c4 30%, #fad0c4 70%, #fbc2eb);
font-family: 'Comic Sans MS', cursive;
}
#chatbox {height:200px}
""") as demo:
gr.HTML("<h1 style='text-align:center; color:white;'>๐โจ AssistAI โ Your Magical Multimodal Buddy โจ๐</h1>")
gr.Markdown("Welcome! Try text, images, or your voice and see the magic ๐งโโ๏ธ\n---")
with gr.Tab("๐ฌ Chat Genie"):
txt_in = gr.Textbox(label="Ask me anything ๐ง", lines=3, placeholder="E.g. Tell me a riddle", elem_id="chatbox")
btn_chat = gr.Button("โจ Generate Reply โจ")
txt_out = gr.Textbox(label="Genie Responds", lines=8)
examples = gr.Examples(
examples=[
"Tell me a riddle",
"Summarize Harry Potter in one sentence",
"What's faster: a cheetah or WiFi?",
"Write me a pirate poem about coding โต๐ป"
],
inputs=[txt_in]
)
btn_chat.click(process_text, inputs=txt_in, outputs=txt_out)
with gr.Tab("๐ผ๏ธ Image Magic"):
img_in = gr.Image(type="pil", label="Upload image ๐ผ๏ธ")
btn_img = gr.Button("๐ช Generate Caption")
img_out = gr.Textbox(label="AI's magical description โจ", lines=5)
btn_img.click(process_image, inputs=img_in, outputs=img_out)
with gr.Tab("๐ค Voice Spell"):
aud_in = gr.Audio(sources=["microphone"], type="filepath", label="Speak your spell ๐ค")
btn_voice = gr.Button("๐ฎ Transcribe + Reply")
aud_out = gr.Textbox(label="Transcript + Genie reply", lines=7)
btn_voice.click(process_audio, inputs=aud_in, outputs=aud_out)
demo.launch() |