Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| from transformers import pipeline, BlipProcessor, BlipForConditionalGeneration | |
| from PIL import Image | |
| import whisper | |
| from gtts import gTTS | |
| import tempfile | |
| # Initialize Models | |
| # 1. Text Model | |
| chat_pipeline = pipeline("text2text-generation", model="facebook/blenderbot-400M-distill") | |
| # 2. Image Model | |
| image_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base") | |
| image_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base") | |
| # 3. Voice Model | |
| voice_model = whisper.load_model("base") | |
| # Streamlit App | |
| st.title("Multimodal AI Assistant") | |
| st.write("Interact with AI via text, voice, and images!") | |
| # Text Input Section | |
| st.header("Text Interaction") | |
| user_text = st.text_input("Enter your query:") | |
| if st.button("Submit Text"): | |
| if user_text: | |
| response = chat_pipeline(user_text) | |
| st.success(f"Assistant: {response[0]['generated_text']}") | |
| # Voice Input Section | |
| st.header("Voice Interaction") | |
| uploaded_audio = st.file_uploader("Upload an audio file:", type=["wav", "mp3"]) | |
| if st.button("Submit Audio"): | |
| if uploaded_audio: | |
| # Save the uploaded audio to a temporary file | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio_file: | |
| temp_audio_file.write(uploaded_audio.read()) | |
| temp_audio_path = temp_audio_file.name | |
| # Transcribe audio to text | |
| transcribed_text = voice_model.transcribe(temp_audio_path)['text'] | |
| st.write(f"Transcribed Text: {transcribed_text}") | |
| # Generate AI response | |
| audio_response = chat_pipeline(transcribed_text) | |
| st.success(f"Assistant: {audio_response[0]['generated_text']}") | |
| # Convert response to speech | |
| tts = gTTS(audio_response[0]['generated_text']) | |
| tts_output_path = "response_audio.mp3" | |
| tts.save(tts_output_path) | |
| st.audio(tts_output_path) | |
| # Image Input Section | |
| st.header("Image Interaction") | |
| uploaded_image = st.file_uploader("Upload an image:", type=["jpg", "png", "jpeg"]) | |
| if st.button("Submit Image"): | |
| if uploaded_image: | |
| # Display uploaded image | |
| image = Image.open(uploaded_image) | |
| st.image(image, caption="Uploaded Image") | |
| # Generate caption | |
| inputs = image_processor(image, return_tensors="pt") | |
| outputs = image_model.generate(**inputs) | |
| caption = image_processor.decode(outputs[0], skip_special_tokens=True) | |
| st.success(f"Generated Caption: {caption}") | |