File size: 2,223 Bytes
ad4186a
90bef38
8d5fabf
ab8ead3
118cd25
ad4186a
ab8ead3
ad4186a
 
cd245d5
8d5fabf
ad4186a
5f21a2d
ad4186a
 
5f21a2d
 
 
ad4186a
5f21a2d
 
 
 
 
 
 
 
 
ad4186a
cd245d5
ad4186a
 
 
8d5fabf
ad4186a
 
 
4e37056
ab8ead3
ad4186a
 
f006a50
ad4186a
ab8ead3
f006a50
ad4186a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7df9b81
ad4186a
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
# Only the two imports you requested
import streamlit as st
from transformers import pipeline
from PIL import Image

# Simple image-to-text function
def img2text(image):
    image_to_text = pipeline("image-to-text", model="sooh-j/blip-image-captioning-base")
    text = image_to_text(image)[0]["generated_text"]
    return text

# Simple text-to-story function
def text2story(text):
    generator = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0")
    prompt = f"Write a short children's story based on this: {text}. Once upon a time, "
    
    story_result = generator(
        prompt,
        max_length=150,
        num_return_sequences=1,
        temperature=0.7,
        do_sample=True
    )
   
    story_text = story_result[0]['generated_text']
    story_text = story_text.replace(prompt, "Once upon a time, ")
    return story_text

# Simple text-to-audio function
def text2audio(story_text):
    synthesizer = pipeline("text-to-speech", model="HelpingAI/HelpingAI-TTS-v1")
    speech = synthesizer(story_text)
    return speech

# Basic Streamlit interface
st.title("Image to Audio Story")
uploaded_file = st.file_uploader("Upload an image")

if uploaded_file is not None:
    # Display image
    st.image(uploaded_file, caption="Uploaded Image")
    
    # Convert to PIL Image
    image = Image.open(uploaded_file)
    
    # Image to Text
    st.write("Generating caption...")
    caption = img2text(image)
    st.write(f"Caption: {caption}")
    
    # Text to Story
    st.write("Creating story...")
    story = text2story(caption)
    st.write(f"Story: {story}")
    
    # Text to Audio
    st.write("Generating audio...")
    speech_output = text2audio(story)
    
    # Play audio
    try:
        if 'audio' in speech_output and 'sampling_rate' in speech_output:
            st.audio(speech_output['audio'], sample_rate=speech_output['sampling_rate'])
        elif 'audio_array' in speech_output and 'sampling_rate' in speech_output:
            st.audio(speech_output['audio_array'], sample_rate=speech_output['sampling_rate'])
        else:
            st.write("Audio generated but could not be played.")
    except Exception as e:
        st.error(f"Error playing audio: {e}")