Spaces:

nashjiwani
/

AssistAI

Sleeping

File size: 2,654 Bytes

1163594
21667ef
 
2dbba12
b2cc3a3
21667ef
 
 
2dbba12
21667ef
 
 
 
 
 
6a2c4b8
21667ef
 
 
 
2dbba12
21667ef
6a2c4b8
 
2dbba12
 
6a2c4b8
2dbba12
6a2c4b8
21667ef
2dbba12
 
6a2c4b8
 
2dbba12
 
 
6a2c4b8
2dbba12
 
 
 
 
 
6a2c4b8
 
2dbba12
21667ef
6a2c4b8
2dbba12
 
6a2c4b8
2dbba12
21667ef
6a2c4b8
2dbba12
 
 
 
21667ef
2dbba12

import gradio as gr
from transformers import pipeline

# Pipelines (same as before, unchanged)
chat_model = pipeline("text-generation", model="Qwen/Qwen-1.5-0.5B-Chat", device=-1)
caption_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base", device=-1)
speech_model = pipeline("automatic-speech-recognition", model="openai/whisper-tiny", device=-1)

# Functions (all safe and previously working)
def process_text(user_input):
    result = chat_model(user_input, max_new_tokens=128, do_sample=True, temperature=0.7)
    return result[0]["generated_text"]

def process_image(image):
    result = caption_model(image)
    return "✨ I see... " + result[0]["generated_text"] + " ✨"

def process_audio(audio_path):
    transcript = speech_model(audio_path)["text"]
    reply = process_text(transcript)
    return f"🎤 You said: **{transcript}**\n\n🤖 Assistant: {reply}"

with gr.Blocks(theme="soft", css="""
body {
    background: linear-gradient(135deg, #ff9a9e, #fad0c4 30%, #fad0c4 70%, #fbc2eb);
    font-family: 'Comic Sans MS', cursive;
}
#chatbox {height:200px}
""") as demo:

    gr.HTML("<h1 style='text-align:center; color:white;'>🌈✨ AssistAI — Your Magical Multimodal Buddy ✨🌈</h1>")
    gr.Markdown("Welcome! Try text, images, or your voice and see the magic 🧙‍♂️\n---")

    with gr.Tab("💬 Chat Genie"):
        txt_in = gr.Textbox(label="Ask me anything 🧞", lines=3, placeholder="E.g. Tell me a riddle", elem_id="chatbox")
        btn_chat = gr.Button("✨ Generate Reply ✨")
        txt_out = gr.Textbox(label="Genie Responds", lines=8)
        examples = gr.Examples(
            examples=[
                "Tell me a riddle",
                "Summarize Harry Potter in one sentence",
                "What's faster: a cheetah or WiFi?",
                "Write me a pirate poem about coding ⛵💻"
            ],
            inputs=[txt_in]
        )
        btn_chat.click(process_text, inputs=txt_in, outputs=txt_out)

    with gr.Tab("🖼️ Image Magic"):
        img_in = gr.Image(type="pil", label="Upload image 🖼️")
        btn_img = gr.Button("🪄 Generate Caption")
        img_out = gr.Textbox(label="AI's magical description ✨", lines=5)
        btn_img.click(process_image, inputs=img_in, outputs=img_out)

    with gr.Tab("🎤 Voice Spell"):
        aud_in = gr.Audio(sources=["microphone"], type="filepath", label="Speak your spell 🎤")
        btn_voice = gr.Button("🔮 Transcribe + Reply")
        aud_out = gr.Textbox(label="Transcript + Genie reply", lines=7)
        btn_voice.click(process_audio, inputs=aud_in, outputs=aud_out)

demo.launch()