File size: 3,966 Bytes
90bef38
 
b038974
cd79461
118cd25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e1ee436
7c5a1e4
 
 
b038974
118cd25
 
cd79461
7c5a1e4
 
 
 
 
 
 
 
 
b038974
7c5a1e4
 
90bef38
7c5a1e4
 
 
 
 
90bef38
118cd25
7c5a1e4
b038974
118cd25
 
cd79461
 
 
 
 
 
 
b038974
cd79461
 
1fb1e8e
cd79461
b038974
7c5a1e4
 
90bef38
7c5a1e4
 
 
 
 
90bef38
118cd25
 
7c5a1e4
118cd25
 
7c5a1e4
118cd25
 
 
 
 
 
7c5a1e4
118cd25
 
 
5b9e396
118cd25
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import streamlit as st
from PIL import Image
import os
import tempfile
import subprocess
import sys

# Check for required dependencies and install if missing
def check_and_install_dependencies():
    required_packages = {
        "transformers": "transformers",
        "sentencepiece": "sentencepiece",
        "gtts": "gTTS"
    }
    
    missing_packages = []
    for package, pip_name in required_packages.items():
        try:
            __import__(package)
        except ImportError:
            missing_packages.append((package, pip_name))
    
    if missing_packages:
        st.warning("Missing required dependencies. Please install them before continuing.")
        for package, pip_name in missing_packages:
            st.code(f"pip install {pip_name}", language="bash")
        
        if st.button("Install Dependencies Automatically"):
            with st.spinner("Installing dependencies..."):
                for package, pip_name in missing_packages:
                    try:
                        subprocess.check_call([sys.executable, "-m", "pip", "install", pip_name])
                        st.success(f"Successfully installed {pip_name}")
                    except Exception as e:
                        st.error(f"Failed to install {pip_name}: {str(e)}")
            st.info("Please restart the application after installing dependencies.")
        return False
    return True

# function part
# img2text
def img2text(image_path):
    try:
        # Import here to ensure dependencies are checked first
        from transformers import pipeline
        
        # Load the image-to-text model
        image_to_text_model = pipeline("image-to-text", model="naver-clova-ix/donut-base")
        # Open the image file
        image = Image.open(image_path)
        # Extract text from the image
        result = image_to_text_model(image)
        # Get the generated text
        text = result[0]["generated_text"] if result else "No text detected"
        return text
    except Exception as e:
        st.error(f"Error processing image: {str(e)}")
        return f"Error: {str(e)}"

# text2story
def text2story(text):
    # For now, just return the extracted text as the story
    story_text = f"Here's a story based on the text: {text}"
    return story_text

# text2audio using Google Text-to-Speech
def text2audio(story_text):
    try:
        from gtts import gTTS
        
        # Create a temporary file
        temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix='.wav')
        temp_audio_path = temp_audio.name
        temp_audio.close()
        
        # Initialize gTTS and generate audio
        tts = gTTS(text=story_text, lang='en', slow=False)
        
        # Save to the temporary file
        tts.save(temp_audio_path)
        
        return temp_audio_path
    except Exception as e:
        st.error(f"Error generating audio: {str(e)}")
        return None

# main part
st.set_page_config(page_title="Your Image to Audio Story",
                   page_icon="🦜")
st.header("Turn Your Image to Audio Story")
st.subheader("Using Donut model for text extraction")

# Check dependencies before proceeding
dependencies_ok = check_and_install_dependencies()

if dependencies_ok:
    uploaded_file = st.file_uploader("Select an Image...", type=['png', 'jpg', 'jpeg', 'gif', 'bmp', 'webp'])

    if uploaded_file is not None:
        # Save the uploaded file temporarily
        bytes_data = uploaded_file.getvalue()
        image_temp_path = os.path.join(tempfile.gettempdir(), uploaded_file.name)
        with open(image_temp_path, "wb") as file:
            file.write(bytes_data)

        # Display the uploaded image
        st.image(uploaded_file, caption="Uploaded Image",
                 use_column_width=True)

        # Stage 1: Image to Text
        with st.spinner('Processing img2text...'):
            extracted_text = img2text(image_temp_path)
            st.subheader("Extracted Text:")