Spaces:

CR7CAD
/

Assignment1

Sleeping

App Files Files Community

Assignment1 / app.py

CR7CAD

Update app.py

118cd25 verified 10 months ago

raw

history blame

3.97 kB

	import streamlit as st
	from PIL import Image
	import os
	import tempfile
	import subprocess
	import sys

	# Check for required dependencies and install if missing
	def check_and_install_dependencies():
	required_packages = {
	"transformers": "transformers",
	"sentencepiece": "sentencepiece",
	"gtts": "gTTS"
	}

	missing_packages = []
	for package, pip_name in required_packages.items():
	try:
	__import__(package)
	except ImportError:
	missing_packages.append((package, pip_name))

	if missing_packages:
	st.warning("Missing required dependencies. Please install them before continuing.")
	for package, pip_name in missing_packages:
	st.code(f"pip install {pip_name}", language="bash")

	if st.button("Install Dependencies Automatically"):
	with st.spinner("Installing dependencies..."):
	for package, pip_name in missing_packages:
	try:
	subprocess.check_call([sys.executable, "-m", "pip", "install", pip_name])
	st.success(f"Successfully installed {pip_name}")
	except Exception as e:
	st.error(f"Failed to install {pip_name}: {str(e)}")
	st.info("Please restart the application after installing dependencies.")
	return False
	return True

	# function part
	# img2text
	def img2text(image_path):
	try:
	# Import here to ensure dependencies are checked first
	from transformers import pipeline

	# Load the image-to-text model
	image_to_text_model = pipeline("image-to-text", model="naver-clova-ix/donut-base")
	# Open the image file
	image = Image.open(image_path)
	# Extract text from the image
	result = image_to_text_model(image)
	# Get the generated text
	text = result[0]["generated_text"] if result else "No text detected"
	return text
	except Exception as e:
	st.error(f"Error processing image: {str(e)}")
	return f"Error: {str(e)}"

	# text2story
	def text2story(text):
	# For now, just return the extracted text as the story
	story_text = f"Here's a story based on the text: {text}"
	return story_text

	# text2audio using Google Text-to-Speech
	def text2audio(story_text):
	try:
	from gtts import gTTS

	# Create a temporary file
	temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix='.wav')
	temp_audio_path = temp_audio.name
	temp_audio.close()

	# Initialize gTTS and generate audio
	tts = gTTS(text=story_text, lang='en', slow=False)

	# Save to the temporary file
	tts.save(temp_audio_path)

	return temp_audio_path
	except Exception as e:
	st.error(f"Error generating audio: {str(e)}")
	return None

	# main part
	st.set_page_config(page_title="Your Image to Audio Story",
	page_icon="🦜")
	st.header("Turn Your Image to Audio Story")
	st.subheader("Using Donut model for text extraction")

	# Check dependencies before proceeding
	dependencies_ok = check_and_install_dependencies()

	if dependencies_ok:
	uploaded_file = st.file_uploader("Select an Image...", type=['png', 'jpg', 'jpeg', 'gif', 'bmp', 'webp'])

	if uploaded_file is not None:
	# Save the uploaded file temporarily
	bytes_data = uploaded_file.getvalue()
	image_temp_path = os.path.join(tempfile.gettempdir(), uploaded_file.name)
	with open(image_temp_path, "wb") as file:
	file.write(bytes_data)

	# Display the uploaded image
	st.image(uploaded_file, caption="Uploaded Image",
	use_column_width=True)

	# Stage 1: Image to Text
	with st.spinner('Processing img2text...'):
	extracted_text = img2text(image_temp_path)
	st.subheader("Extracted Text:")