File size: 2,203 Bytes
f005303
2cd353e
 
 
6a2dbee
2cd353e
6a2dbee
2cd353e
6a2dbee
2cd353e
 
 
 
 
6a2dbee
 
 
2cd353e
 
 
6a2dbee
2cd353e
6a2dbee
 
 
2cd353e
 
 
6a2dbee
2cd353e
6a2dbee
 
2cd353e
6a2dbee
 
2cd353e
 
6a2dbee
2cd353e
 
 
 
 
 
 
 
6a2dbee
2cd353e
 
6a2dbee
2cd353e
6a2dbee
2cd353e
 
 
 
6a2dbee
2cd353e
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import streamlit as st
from transformers import pipeline
from PIL import Image

# Creates a brief description for the pictures
def generate_caption(image):
    with st.spinner("Analysing the Pictures for Key Message..."):
        # Loads the BLIP model to examine and describe the picture
        image_to_text = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large")
        caption = image_to_text(image)[0]["generated_text"]
    return caption

# Builds a story from the picture’s description
def generate_story(caption):
    with st.spinner("Enhancing the Story for better Details..."):
        # Uses the text generation model to create a story based on the description
        pipe = pipeline("text-generation", model="TheBloke/phi-2-GGUF")
        story = pipe(caption)[0]['generated_text']
    return story

# Turns the story into audio
def generate_audio(story):
    with st.spinner("Turning story into News audio..."):
        # Uses a speech model to turn description into audio
        pipe = pipeline("text-to-speech", model="hexgrad/Kokoro-82M")
        audio = pipe(story)
    return audio

# Streamlit UI: Makes a simple interface to generate the audio

# Displays the title
st.title("Tool for the Reporter - Turning the News Photo into Audio")

# Describes the app for users
st.write("Please upload the News Photo within 200MB")

# Allows picture uploads
uploaded_file = st.file_uploader("Upload the Photo below", type=["png", "jpg", "jpeg"])

if uploaded_file is not None:
    # Shows the uploaded picture
    image = Image.open(uploaded_file)
    st.image(image, caption="Your Picture!", use_container_width=True)

    # Gets the picture’s description
    image_caption = generate_caption(image)
    st.subheader("Phot Description:")
    st.write(image_caption)

    # Generate the News descriptions
    story_telling = generate_story(image_caption)
    st.subheader("The News:")
    st.write(story_telling)

    # Generates audio
    audio = generate_audio(story_telling)
    if st.button("Hear the News"):
        st.audio(audio['audio'],
                 format="audio/wav",
                 start_time=0,
                 sample_rate=audio['sampling_rate'])