Spaces:
Sleeping
Sleeping
File size: 3,017 Bytes
cd245d5 90bef38 8d5fabf cd245d5 118cd25 cd245d5 a084b90 cd245d5 8d5fabf cd245d5 b9c5fcd f006a50 b9c5fcd cd245d5 f006a50 cd245d5 f006a50 cd245d5 f006a50 cd245d5 f006a50 b9c5fcd f006a50 cd245d5 8d5fabf cd245d5 f006a50 cd245d5 8d5fabf cd245d5 8d5fabf cd245d5 f006a50 4e37056 f006a50 a084b90 cd245d5 f006a50 8d5fabf f006a50 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 |
# import part
import streamlit as st
from transformers import pipeline
import os
# function part
# img2text
def img2text(image_path):
image_to_text = pipeline("image-to-text", model="dumperize/movie-picture-captioning")
text = image_to_text(image_path)[0]["generated_text"]
return text
# text2story
def text2story(text):
messages = [
{"role": "user", "content": "Who are you?"},
]
# Using a smaller text generation model
generator = pipeline("text-generation", model="mlx-community/Llama-3.2-1B-Instruct-4bit")
generator(messages)
# Create a prompt for the story generation
prompt = f"Write a fun children's story based on this: {text}. Once upon a time, "
# Generate the story
story_result = generator(
prompt,
max_length=200,
num_return_sequences=1,
temperature=0.8,
top_k=50,
top_p=0.95,
do_sample=True
)
# Extract the generated text
story_text = story_result[0]['generated_text']
story_text = story_text.replace(prompt, "Once upon a time, ")
# Make sure the story is at least 100 words
words = story_text.split()
if len(words) > 100:
# Simply truncate to 100 words
story_text = " ".join(words[:100])
return story_text
# text2audio
def text2audio(story_text):
tts = pipeline("text-to-speech", model="espnet/kan-bayashi_ljspeech_vits")
audio_output = tts(story_text)
return {
"audio": audio_output["audio"],
"sampling_rate": audio_output["sampling_rate"]
}
# Function to save temporary image file
def save_uploaded_image(uploaded_file):
if not os.path.exists("temp"):
os.makedirs("temp")
image_path = os.path.join("temp", uploaded_file.name)
with open(image_path, "wb") as f:
f.write(uploaded_file.getvalue())
return image_path
# main part
st.set_page_config(page_title="Your Image to Audio Story", page_icon="🦜")
st.header("Turn Your Image to Audio Story")
uploaded_file = st.file_uploader("Select an Image...")
if uploaded_file is not None:
# Display the uploaded image
st.image(uploaded_file, caption="Uploaded Image", use_container_width=True)
# Save the image temporarily
image_path = save_uploaded_image(uploaded_file)
# Stage 1: Image to Text
st.text('Processing img2text...')
caption = img2text(image_path)
st.write(caption)
# Stage 2: Text to Story
st.text('Generating a story...')
story = text2story(caption)
st.write(story)
# Stage 3: Story to Audio data
st.text('Generating audio data...')
audio_data = text2audio(story)
# Play button
if st.button("Play Audio"):
st.audio(
audio_data["audio"],
format="audio/wav",
start_time=0,
sample_rate=audio_data["sampling_rate"]
)
# Clean up the temporary file
try:
os.remove(image_path)
except:
pass |