Spaces:

CR7CAD
/

Assignment1

Sleeping

File size: 3,966 Bytes

import streamlit as st
from PIL import Image
import os
import tempfile
import subprocess
import sys

# Check for required dependencies and install if missing
def check_and_install_dependencies():
    required_packages = {
        "transformers": "transformers",
        "sentencepiece": "sentencepiece",
        "gtts": "gTTS"
    }
    
    missing_packages = []
    for package, pip_name in required_packages.items():
        try:
            __import__(package)
        except ImportError:
            missing_packages.append((package, pip_name))
    
    if missing_packages:
        st.warning("Missing required dependencies. Please install them before continuing.")
        for package, pip_name in missing_packages:
            st.code(f"pip install {pip_name}", language="bash")
        
        if st.button("Install Dependencies Automatically"):
            with st.spinner("Installing dependencies..."):
                for package, pip_name in missing_packages:
                    try:
                        subprocess.check_call([sys.executable, "-m", "pip", "install", pip_name])
                        st.success(f"Successfully installed {pip_name}")
                    except Exception as e:
                        st.error(f"Failed to install {pip_name}: {str(e)}")
            st.info("Please restart the application after installing dependencies.")
        return False
    return True

# function part
# img2text
def img2text(image_path):
    try:
        # Import here to ensure dependencies are checked first
        from transformers import pipeline
        
        # Load the image-to-text model
        image_to_text_model = pipeline("image-to-text", model="naver-clova-ix/donut-base")
        # Open the image file
        image = Image.open(image_path)
        # Extract text from the image
        result = image_to_text_model(image)
        # Get the generated text
        text = result[0]["generated_text"] if result else "No text detected"
        return text
    except Exception as e:
        st.error(f"Error processing image: {str(e)}")
        return f"Error: {str(e)}"

# text2story
def text2story(text):
    # For now, just return the extracted text as the story
    story_text = f"Here's a story based on the text: {text}"
    return story_text

# text2audio using Google Text-to-Speech
def text2audio(story_text):
    try:
        from gtts import gTTS
        
        # Create a temporary file
        temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix='.wav')
        temp_audio_path = temp_audio.name
        temp_audio.close()
        
        # Initialize gTTS and generate audio
        tts = gTTS(text=story_text, lang='en', slow=False)
        
        # Save to the temporary file
        tts.save(temp_audio_path)
        
        return temp_audio_path
    except Exception as e:
        st.error(f"Error generating audio: {str(e)}")
        return None

# main part
st.set_page_config(page_title="Your Image to Audio Story",
                   page_icon="🦜")
st.header("Turn Your Image to Audio Story")
st.subheader("Using Donut model for text extraction")

# Check dependencies before proceeding
dependencies_ok = check_and_install_dependencies()

if dependencies_ok:
    uploaded_file = st.file_uploader("Select an Image...", type=['png', 'jpg', 'jpeg', 'gif', 'bmp', 'webp'])

    if uploaded_file is not None:
        # Save the uploaded file temporarily
        bytes_data = uploaded_file.getvalue()
        image_temp_path = os.path.join(tempfile.gettempdir(), uploaded_file.name)
        with open(image_temp_path, "wb") as file:
            file.write(bytes_data)

        # Display the uploaded image
        st.image(uploaded_file, caption="Uploaded Image",
                 use_column_width=True)

        # Stage 1: Image to Text
        with st.spinner('Processing img2text...'):
            extracted_text = img2text(image_temp_path)
            st.subheader("Extracted Text:")