File size: 2,654 Bytes
1163594
21667ef
 
2dbba12
b2cc3a3
21667ef
 
 
2dbba12
21667ef
 
 
 
 
 
6a2c4b8
21667ef
 
 
 
2dbba12
21667ef
6a2c4b8
 
2dbba12
 
6a2c4b8
2dbba12
6a2c4b8
21667ef
2dbba12
 
6a2c4b8
 
2dbba12
 
 
6a2c4b8
2dbba12
 
 
 
 
 
6a2c4b8
 
2dbba12
21667ef
6a2c4b8
2dbba12
 
6a2c4b8
2dbba12
21667ef
6a2c4b8
2dbba12
 
 
 
21667ef
2dbba12
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import gradio as gr
from transformers import pipeline

# Pipelines (same as before, unchanged)
chat_model = pipeline("text-generation", model="Qwen/Qwen-1.5-0.5B-Chat", device=-1)
caption_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base", device=-1)
speech_model = pipeline("automatic-speech-recognition", model="openai/whisper-tiny", device=-1)

# Functions (all safe and previously working)
def process_text(user_input):
    result = chat_model(user_input, max_new_tokens=128, do_sample=True, temperature=0.7)
    return result[0]["generated_text"]

def process_image(image):
    result = caption_model(image)
    return "โœจ I see... " + result[0]["generated_text"] + " โœจ"

def process_audio(audio_path):
    transcript = speech_model(audio_path)["text"]
    reply = process_text(transcript)
    return f"๐ŸŽค You said: **{transcript}**\n\n๐Ÿค– Assistant: {reply}"

with gr.Blocks(theme="soft", css="""
body {
    background: linear-gradient(135deg, #ff9a9e, #fad0c4 30%, #fad0c4 70%, #fbc2eb);
    font-family: 'Comic Sans MS', cursive;
}
#chatbox {height:200px}
""") as demo:

    gr.HTML("<h1 style='text-align:center; color:white;'>๐ŸŒˆโœจ AssistAI โ€” Your Magical Multimodal Buddy โœจ๐ŸŒˆ</h1>")
    gr.Markdown("Welcome! Try text, images, or your voice and see the magic ๐Ÿง™โ€โ™‚๏ธ\n---")

    with gr.Tab("๐Ÿ’ฌ Chat Genie"):
        txt_in = gr.Textbox(label="Ask me anything ๐Ÿงž", lines=3, placeholder="E.g. Tell me a riddle", elem_id="chatbox")
        btn_chat = gr.Button("โœจ Generate Reply โœจ")
        txt_out = gr.Textbox(label="Genie Responds", lines=8)
        examples = gr.Examples(
            examples=[
                "Tell me a riddle",
                "Summarize Harry Potter in one sentence",
                "What's faster: a cheetah or WiFi?",
                "Write me a pirate poem about coding โ›ต๐Ÿ’ป"
            ],
            inputs=[txt_in]
        )
        btn_chat.click(process_text, inputs=txt_in, outputs=txt_out)

    with gr.Tab("๐Ÿ–ผ๏ธ Image Magic"):
        img_in = gr.Image(type="pil", label="Upload image ๐Ÿ–ผ๏ธ")
        btn_img = gr.Button("๐Ÿช„ Generate Caption")
        img_out = gr.Textbox(label="AI's magical description โœจ", lines=5)
        btn_img.click(process_image, inputs=img_in, outputs=img_out)

    with gr.Tab("๐ŸŽค Voice Spell"):
        aud_in = gr.Audio(sources=["microphone"], type="filepath", label="Speak your spell ๐ŸŽค")
        btn_voice = gr.Button("๐Ÿ”ฎ Transcribe + Reply")
        aud_out = gr.Textbox(label="Transcript + Genie reply", lines=7)
        btn_voice.click(process_audio, inputs=aud_in, outputs=aud_out)

demo.launch()