Spaces:
Sleeping
Sleeping
File size: 3,966 Bytes
90bef38 b038974 cd79461 118cd25 e1ee436 7c5a1e4 b038974 118cd25 cd79461 7c5a1e4 b038974 7c5a1e4 90bef38 7c5a1e4 90bef38 118cd25 7c5a1e4 b038974 118cd25 cd79461 b038974 cd79461 1fb1e8e cd79461 b038974 7c5a1e4 90bef38 7c5a1e4 90bef38 118cd25 7c5a1e4 118cd25 7c5a1e4 118cd25 7c5a1e4 118cd25 5b9e396 118cd25 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 |
import streamlit as st
from PIL import Image
import os
import tempfile
import subprocess
import sys
# Check for required dependencies and install if missing
def check_and_install_dependencies():
required_packages = {
"transformers": "transformers",
"sentencepiece": "sentencepiece",
"gtts": "gTTS"
}
missing_packages = []
for package, pip_name in required_packages.items():
try:
__import__(package)
except ImportError:
missing_packages.append((package, pip_name))
if missing_packages:
st.warning("Missing required dependencies. Please install them before continuing.")
for package, pip_name in missing_packages:
st.code(f"pip install {pip_name}", language="bash")
if st.button("Install Dependencies Automatically"):
with st.spinner("Installing dependencies..."):
for package, pip_name in missing_packages:
try:
subprocess.check_call([sys.executable, "-m", "pip", "install", pip_name])
st.success(f"Successfully installed {pip_name}")
except Exception as e:
st.error(f"Failed to install {pip_name}: {str(e)}")
st.info("Please restart the application after installing dependencies.")
return False
return True
# function part
# img2text
def img2text(image_path):
try:
# Import here to ensure dependencies are checked first
from transformers import pipeline
# Load the image-to-text model
image_to_text_model = pipeline("image-to-text", model="naver-clova-ix/donut-base")
# Open the image file
image = Image.open(image_path)
# Extract text from the image
result = image_to_text_model(image)
# Get the generated text
text = result[0]["generated_text"] if result else "No text detected"
return text
except Exception as e:
st.error(f"Error processing image: {str(e)}")
return f"Error: {str(e)}"
# text2story
def text2story(text):
# For now, just return the extracted text as the story
story_text = f"Here's a story based on the text: {text}"
return story_text
# text2audio using Google Text-to-Speech
def text2audio(story_text):
try:
from gtts import gTTS
# Create a temporary file
temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix='.wav')
temp_audio_path = temp_audio.name
temp_audio.close()
# Initialize gTTS and generate audio
tts = gTTS(text=story_text, lang='en', slow=False)
# Save to the temporary file
tts.save(temp_audio_path)
return temp_audio_path
except Exception as e:
st.error(f"Error generating audio: {str(e)}")
return None
# main part
st.set_page_config(page_title="Your Image to Audio Story",
page_icon="🦜")
st.header("Turn Your Image to Audio Story")
st.subheader("Using Donut model for text extraction")
# Check dependencies before proceeding
dependencies_ok = check_and_install_dependencies()
if dependencies_ok:
uploaded_file = st.file_uploader("Select an Image...", type=['png', 'jpg', 'jpeg', 'gif', 'bmp', 'webp'])
if uploaded_file is not None:
# Save the uploaded file temporarily
bytes_data = uploaded_file.getvalue()
image_temp_path = os.path.join(tempfile.gettempdir(), uploaded_file.name)
with open(image_temp_path, "wb") as file:
file.write(bytes_data)
# Display the uploaded image
st.image(uploaded_file, caption="Uploaded Image",
use_column_width=True)
# Stage 1: Image to Text
with st.spinner('Processing img2text...'):
extracted_text = img2text(image_temp_path)
st.subheader("Extracted Text:") |