File size: 2,292 Bytes
38da107
598ecb7
b6eef41
2d5370d
 
 
598ecb7
38da107
 
2d5370d
38da107
2d5370d
bc98b06
38da107
2d5370d
 
 
598ecb7
38da107
76bead5
38da107
2d5370d
bc98b06
38da107
 
 
76bead5
2d5370d
 
 
598ecb7
38da107
598ecb7
38da107
bc98b06
 
76bead5
 
598ecb7
b6eef41
e698bdd
bc98b06
 
e698bdd
b6eef41
 
e698bdd
 
bc98b06
e698bdd
b6eef41
 
2d5370d
 
 
 
 
38da107
 
bc98b06
86cd9e8
e698bdd
38da107
 
bc98b06
86cd9e8
bc98b06
e698bdd
38da107
 
bc98b06
86cd9e8
38da107
bc98b06
e698bdd
38da107
 
b6eef41
 
598ecb7
bc98b06
b6eef41
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82

import streamlit as st
from transformers import pipeline
from gtts import gTTS
import tempfile
import os

# function part
# img2text
def img2text(img_path):
    # Image captioning model
    captioner = pipeline(
        "image-to-text",
        model="nlpconnect/vit-gpt2-image-captioning"  #This model is relatively fast and accurate
    )
    result = captioner(img_path)
    return result[0]["generated_text"]

# text2story
def text2story(scenario):
    # Story generator config
    generator = pipeline(
        "text-generation",
        model="gpt2",  #Relatively small but fast
        max_length=200, # Maximum story lengt
        num_return_sequences=1 #Number of variants to generate
    )
    prompt = f"Create a children's story based on: {scenario}"
    story = generator(prompt)[0]["generated_text"]
    return story

# text2audio
def text2audio(story_text):
    # Audio file creation
    tts = gTTS(text=story_text, lang="en")
    audio_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
    tts.save(audio_file.name)
    return audio_file.name

def main():
    st.set_page_config(
        page_title="Image to Story",
        page_icon="📖"
    )
    st.header("Upload Your Image")
    
    uploaded_file = st.file_uploader(
        "Choose Image",
        type=["jpg", "png", "jpeg"]
    )
    
    if uploaded_file:
        temp_img = os.path.join(tempfile.gettempdir(), uploaded_file.name)
        with open(temp_img, "wb") as f:
            f.write(uploaded_file.getvalue())
        
        st.image(uploaded_file)

        #Stage 1: Image to Text
        with st.status("🖼️ Processing image..."):
            scenario = img2text(temp_img)
            st.write("Image Caption:", scenario)

        #Stage 2: Text to Story
        with st.status("📖 Generating story..."):
            story = text2story(scenario)
            st.subheader("Story")
            st.write(story)

        #Stage 3: Story to Audio data
        with st.status("🔊 Converting audio..."):
            audio_path = text2audio(story)
        # Play button
        if st.button("▶️ Play Audio Story"):
            st.audio(audio_path, format="audio/mp3")

        # Cleanup
        os.unlink(temp_img)
        os.unlink(audio_path)

if __name__ == "__main__":
    main()