File size: 2,019 Bytes
a4d3132 c63d328 a4d3132 16cda9f a4d3132 c63d328 a4d3132 1eebd1b 5f39df4 6be9290 04fe24c 5f39df4 733173b c63d328 a4d3132 74fc286 c63d328 a4d3132 c63d328 a4d3132 932211d 40d2871 c63d328 e35efdf c63d328 8a1f685 c63d328 f966229 86cd1ed c63d328 d46ae3c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
# import part
import streamlit as st
from transformers import pipeline
# function part
# image2text
def img2text(img):
image_to_text_model = pipeline("image-to-text",
model="nlpconnect/vit-gpt2-image-captioning")
text = image_to_text_model(img)[0]["generated_text"]
return text
# text2story
def text2story(text):
text_generation_model = pipeline("text-generation",
model="openai-community/gpt2")
story_text = f"Once upon a time in a land far, far away, {text}"
generated_story = text_generation_model(story_text,
max_length=100,
num_return_sequences=1)
return generated_story[0]['generated_text']
# text2audio
def text2audio(story_text):
text_to_speech_model = pipeline("text-to-speech", model="facebook/mms-tts-eng")
speech_output = text_to_speech_model(story_text)
return speech_output
# main part
st.set_page_config(page_title="Your Image to Audio Story",
page_icon="*")
st.header("Turn Your Image to Audio Story")
uploaded_file = st.file_uploader("Select an Image...", type=["jpg", "png", "jpeg"])
if uploaded_file is not None:
print(uploaded_file)
bytes_data = uploaded_file.getvalue()
with open(uploaded_file.name, "wb") as file:
file.write(bytes_data)
st.image(uploaded_file, caption="Uploaded Image", use_container_width=True)
# stage 1
st.text('Processing img2text...')
scenario = img2text(uploaded_file.name)
st.write(scenario)
# stage 2
st.text('Generating a story...')
generated_story = text2story(scenario)
# Use the scenario from img2text
st.write(generated_story)
# stage 3
st.text('Generating audio data...')
audio_data = text2audio(generated_story)
if st.button("Play Audio"):
st.audio(audio_data['audio'],
format="audio/wav",
start_time=0,
sample_rate=audio_data['sampling_rate']) |