Spaces:

CR7CAD
/

Assignment1

Sleeping

App Files Files Community

CR7CAD commited on Mar 8

Commit

118cd25

verified ·

1 Parent(s): cd79461

Update app.py

Browse files

Files changed (1) hide show

app.py +56 -60

app.py CHANGED Viewed

@@ -1,21 +1,48 @@
 import streamlit as st
-from transformers import pipeline
 from PIL import Image
 import os
-import torch
-from gtts import gTTS
 import tempfile
 # function part
 # img2text
 def img2text(image_path):
     try:
-        # Check if sentencepiece is installed
-        try:
-            import sentencepiece
-        except ImportError:
-            st.error("sentencepiece is not installed. Please install it with: pip install sentencepiece")
-            return "Error: sentencepiece not installed"
         # Load the image-to-text model
         image_to_text_model = pipeline("image-to-text", model="naver-clova-ix/donut-base")
@@ -33,13 +60,14 @@ def img2text(image_path):
 # text2story
 def text2story(text):
     # For now, just return the extracted text as the story
-    # This function can be expanded later with more sophisticated story generation
     story_text = f"Here's a story based on the text: {text}"
     return story_text
-# text2audio using Google Text-to-Speech instead of transformers
 def text2audio(story_text):
     try:
         # Create a temporary file
         temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix='.wav')
         temp_audio_path = temp_audio.name
@@ -62,56 +90,24 @@ st.set_page_config(page_title="Your Image to Audio Story",
 st.header("Turn Your Image to Audio Story")
 st.subheader("Using Donut model for text extraction")
-uploaded_file = st.file_uploader("Select an Image...", type=['png', 'jpg', 'jpeg', 'gif', 'bmp', 'webp'])
-if uploaded_file is not None:
-    # Save the uploaded file temporarily
-    bytes_data = uploaded_file.getvalue()
-    image_temp_path = os.path.join(tempfile.gettempdir(), uploaded_file.name)
-    with open(image_temp_path, "wb") as file:
-        file.write(bytes_data)
-    # Display the uploaded image
-    st.image(uploaded_file, caption="Uploaded Image",
-             use_column_width=True)
-    # Stage 1: Image to Text
-    with st.spinner('Processing img2text...'):
-        extracted_text = img2text(image_temp_path)
-        st.subheader("Extracted Text:")
-        st.write(extracted_text)
-    # Stage 2: Text to Story
-    with st.spinner('Generating a story...'):
-        story = text2story(extracted_text)
-        st.subheader("Generated Story:")
-        st.write(story)
-    # Stage 3: Story to Audio data
-    audio_file_path = None
-    with st.spinner('Generating audio data...'):
-        audio_file_path = text2audio(story)
-    # Remove the temporary image file
-    if os.path.exists(image_temp_path):
-        os.remove(image_temp_path)
-    # Play button
-    if st.button("Play Audio"):
-        if audio_file_path and os.path.exists(audio_file_path):
-            # Play the generated audio
-            with open(audio_file_path, "rb") as audio_file:
-                audio_bytes = audio_file.read()
-            st.audio(audio_bytes, format="audio/wav")
-            # Clean up the audio file after playing
-            try:
-                os.remove(audio_file_path)
-            except:
-                pass
-        else:
-            st.warning("Audio generation failed. Playing a placeholder audio.")
-            try:
-                st.audio("kids_playing_audio.wav")
-            except FileNotFoundError:
-                st.error("Placeholder audio file not found. Audio playback is unavailable.")

 import streamlit as st
 from PIL import Image
 import os
 import tempfile
+import subprocess
+import sys
+# Check for required dependencies and install if missing
+def check_and_install_dependencies():
+    required_packages = {
+        "transformers": "transformers",
+        "sentencepiece": "sentencepiece",
+        "gtts": "gTTS"
+    }
+    missing_packages = []
+    for package, pip_name in required_packages.items():
+        try:
+            __import__(package)
+        except ImportError:
+            missing_packages.append((package, pip_name))
+    if missing_packages:
+        st.warning("Missing required dependencies. Please install them before continuing.")
+        for package, pip_name in missing_packages:
+            st.code(f"pip install {pip_name}", language="bash")
+        if st.button("Install Dependencies Automatically"):
+            with st.spinner("Installing dependencies..."):
+                for package, pip_name in missing_packages:
+                    try:
+                        subprocess.check_call([sys.executable, "-m", "pip", "install", pip_name])
+                        st.success(f"Successfully installed {pip_name}")
+                    except Exception as e:
+                        st.error(f"Failed to install {pip_name}: {str(e)}")
+            st.info("Please restart the application after installing dependencies.")
+        return False
+    return True
 # function part
 # img2text
 def img2text(image_path):
     try:
+        # Import here to ensure dependencies are checked first
+        from transformers import pipeline
         # Load the image-to-text model
         image_to_text_model = pipeline("image-to-text", model="naver-clova-ix/donut-base")
 # text2story
 def text2story(text):
     # For now, just return the extracted text as the story
     story_text = f"Here's a story based on the text: {text}"
     return story_text
+# text2audio using Google Text-to-Speech
 def text2audio(story_text):
     try:
+        from gtts import gTTS
         # Create a temporary file
         temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix='.wav')
         temp_audio_path = temp_audio.name
 st.header("Turn Your Image to Audio Story")
 st.subheader("Using Donut model for text extraction")
+# Check dependencies before proceeding
+dependencies_ok = check_and_install_dependencies()
+if dependencies_ok:
+    uploaded_file = st.file_uploader("Select an Image...", type=['png', 'jpg', 'jpeg', 'gif', 'bmp', 'webp'])
+    if uploaded_file is not None:
+        # Save the uploaded file temporarily
+        bytes_data = uploaded_file.getvalue()
+        image_temp_path = os.path.join(tempfile.gettempdir(), uploaded_file.name)
+        with open(image_temp_path, "wb") as file:
+            file.write(bytes_data)
+        # Display the uploaded image
+        st.image(uploaded_file, caption="Uploaded Image",
+                 use_column_width=True)
+        # Stage 1: Image to Text
+        with st.spinner('Processing img2text...'):
+            extracted_text = img2text(image_temp_path)
+            st.subheader("Extracted Text:")