# import part import streamlit as st from transformers import pipeline import os import tempfile # function part # img2text def img2text(image_path): image_to_text = pipeline("image-to-text", model="sooh-j/blip-image-captioning-base") text = image_to_text(image_path)[0]["generated_text"] return text # text2story def text2story(text): # Using a smaller text generation model generator = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0") # Create a prompt for the story generation prompt = f"Write a fun children's story based on this: {text}. Once upon a time, " # Generate the story story_result = generator( prompt, max_length=150, num_return_sequences=1, temperature=0.7, top_k=50, top_p=0.95, do_sample=True ) # Extract the generated text story_text = story_result[0]['generated_text'] story_text = story_text.replace(prompt, "Once upon a time, ") # Make sure the story is at least 100 words words = story_text.split() if len(words) > 100: # Simply truncate to 100 words story_text = " ".join(words[:100]) return story_text # text2audio - REVISED to handle audio format correctly def text2audio(story_text): try: # Use a simple, reliable TTS model synthesizer = pipeline("text-to-speech", model="facebook/mms-tts-eng") # Limit text length to avoid timeouts max_chars = 500 if len(story_text) > max_chars: last_period = story_text[:max_chars].rfind('.') if last_period > 0: story_text = story_text[:last_period + 1] else: story_text = story_text[:max_chars] # Generate speech speech = synthesizer(story_text) # Create a temporary file with .wav extension temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.wav') temp_filename = temp_file.name temp_file.close() # Close the file so we can write to it # Write the raw audio data to the file with open(temp_filename, 'wb') as f: f.write(speech['bytes']) # Using the 'bytes' field instead of 'audio' return temp_filename except Exception as e: st.error(f"Error generating audio: {str(e)}") return None # Function to save temporary image file def save_uploaded_image(uploaded_file): if not os.path.exists("temp"): os.makedirs("temp") image_path = os.path.join("temp", uploaded_file.name) with open(image_path, "wb") as f: f.write(uploaded_file.getvalue()) return image_path # main part st.set_page_config(page_title="Your Image to Audio Story", page_icon="🦜") st.header("Turn Your Image to Audio Story") uploaded_file = st.file_uploader("Select an Image...") if uploaded_file is not None: # Display the uploaded image st.image(uploaded_file, caption="Uploaded Image", use_container_width=True) # Save the image temporarily image_path = save_uploaded_image(uploaded_file) # Stage 1: Image to Text st.text('Processing img2text...') caption = img2text(image_path) st.write(caption) # Stage 2: Text to Story st.text('Generating a story...') story = text2story(caption) st.write(story) # Stage 3: Story to Audio data st.text('Generating audio data...') audio_file = text2audio(story) # Play button if st.button("Play Audio"): if audio_file and os.path.exists(audio_file): # Play the audio file st.audio(audio_file) else: st.error("Audio generation failed. Please try again.") # Clean up the temporary files try: os.remove(image_path) # Don't delete audio file immediately as it might still be playing except: pass