Spaces:
Sleeping
Sleeping
File size: 2,203 Bytes
f005303 2cd353e 6a2dbee 2cd353e 6a2dbee 2cd353e 6a2dbee 2cd353e 6a2dbee 2cd353e 6a2dbee 2cd353e 6a2dbee 2cd353e 6a2dbee 2cd353e 6a2dbee 2cd353e 6a2dbee 2cd353e 6a2dbee 2cd353e 6a2dbee 2cd353e 6a2dbee 2cd353e 6a2dbee 2cd353e 6a2dbee 2cd353e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
import streamlit as st
from transformers import pipeline
from PIL import Image
# Creates a brief description for the pictures
def generate_caption(image):
with st.spinner("Analysing the Pictures for Key Message..."):
# Loads the BLIP model to examine and describe the picture
image_to_text = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large")
caption = image_to_text(image)[0]["generated_text"]
return caption
# Builds a story from the picture’s description
def generate_story(caption):
with st.spinner("Enhancing the Story for better Details..."):
# Uses the text generation model to create a story based on the description
pipe = pipeline("text-generation", model="TheBloke/phi-2-GGUF")
story = pipe(caption)[0]['generated_text']
return story
# Turns the story into audio
def generate_audio(story):
with st.spinner("Turning story into News audio..."):
# Uses a speech model to turn description into audio
pipe = pipeline("text-to-speech", model="hexgrad/Kokoro-82M")
audio = pipe(story)
return audio
# Streamlit UI: Makes a simple interface to generate the audio
# Displays the title
st.title("Tool for the Reporter - Turning the News Photo into Audio")
# Describes the app for users
st.write("Please upload the News Photo within 200MB")
# Allows picture uploads
uploaded_file = st.file_uploader("Upload the Photo below", type=["png", "jpg", "jpeg"])
if uploaded_file is not None:
# Shows the uploaded picture
image = Image.open(uploaded_file)
st.image(image, caption="Your Picture!", use_container_width=True)
# Gets the picture’s description
image_caption = generate_caption(image)
st.subheader("Phot Description:")
st.write(image_caption)
# Generate the News descriptions
story_telling = generate_story(image_caption)
st.subheader("The News:")
st.write(story_telling)
# Generates audio
audio = generate_audio(story_telling)
if st.button("Hear the News"):
st.audio(audio['audio'],
format="audio/wav",
start_time=0,
sample_rate=audio['sampling_rate']) |