Spaces:

CR7CAD
/

Assignment1

Sleeping

App Files Files Community

CR7CAD commited on Mar 8, 2025

Commit

90bef38

verified ·

1 Parent(s): 7704b1e

Update app.py

Browse files

Files changed (1) hide show

app.py +65 -110

app.py CHANGED Viewed

@@ -1,119 +1,74 @@
-import os
-import argparse
-from PIL import Image
 from transformers import pipeline
-def load_model():
-    """Load the image-to-text model."""
-    print("Loading image-to-text model...")
-    try:
-        pipe = pipeline("image-to-text", model="naver-clova-ix/donut-base")
-        print("Model loaded successfully")
-        return pipe
-    except Exception as e:
-        print(f"Error loading model: {str(e)}")
-        raise
-def extract_text_from_image(image_path, model):
-    """Extract text from an image using the loaded model.
-    Args:
-        image_path (str): Path to the image file
-        model: The loaded image-to-text pipeline
-    Returns:
-        str: Extracted text from the image
-    """
-    try:
-        # Check if the file exists
-        if not os.path.exists(image_path):
-            raise FileNotFoundError(f"Image file not found: {image_path}")
-        # Open and process the image
-        image = Image.open(image_path)
-        # Extract text using the model
-        result = model(image)
-        # Get the generated text from the result
-        if result and len(result) > 0:
-            return result[0]['generated_text']
-        else:
-            return "No text detected in the image"
-    except Exception as e:
-        print(f"Error processing image: {str(e)}")
-        return f"Error: {str(e)}"
-def process_directory(directory_path, model, output_file=None):
-    """Process all images in a directory.
-    Args:
-        directory_path (str): Path to directory containing images
-        model: The loaded image-to-text pipeline
-        output_file (str, optional): Path to save results to a text file
-    """
-    results = {}
-    # Check if the directory exists
-    if not os.path.exists(directory_path):
-        print(f"Directory not found: {directory_path}")
-        return
-    # Process each file in the directory
-    for filename in os.listdir(directory_path):
-        # Check if the file is an image
-        if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp', '.webp')):
-            image_path = os.path.join(directory_path, filename)
-            print(f"Processing {filename}...")
-            # Extract text from the image
-            text = extract_text_from_image(image_path, model)
-            results[filename] = text
-            print(f"Result for {filename}: {text}")
-    # Save results to a file if output_file is specified
-    if output_file and results:
-        with open(output_file, 'w', encoding='utf-8') as f:
-            for filename, text in results.items():
-                f.write(f"File: {filename}\n")
-                f.write(f"Text: {text}\n")
-                f.write("-" * 50 + "\n")
-        print(f"Results saved to {output_file}")
-    return results
-def main():
-    # Parse command line arguments
-    parser = argparse.ArgumentParser(description='Extract text from images using Donut model')
-    parser.add_argument('--image', help='Path to an image file')
-    parser.add_argument('--dir', help='Path to a directory containing images')
-    parser.add_argument('--output', help='Path to save output to a text file')
-    args = parser.parse_args()
-    # Load the model
-    model = load_model()
-    # Process a single image or a directory of images
-    if args.image:
-        # Process a single image
-        text = extract_text_from_image(args.image, model)
-        print(f"Extracted text: {text}")
-        # Save to file if output is specified
-        if args.output:
-            with open(args.output, 'w', encoding='utf-8') as f:
-                f.write(f"File: {os.path.basename(args.image)}\n")
-                f.write(f"Text: {text}\n")
-            print(f"Result saved to {args.output}")
-    elif args.dir:
-        # Process a directory of images
-        process_directory(args.dir, model, args.output)
-    else:
-        print("Please provide either --image or --dir argument")
-if __name__ == "__main__":
-    main()

+import streamlit as st
 from transformers import pipeline
+from PIL import Image
+import io
+from gtts import gTTS
+import time
+# Set page title
+st.set_page_config(page_title="Kids Story Generator")
+# Title and introduction
+st.title("Kids Story Generator")
+st.write("Upload a picture and let's create a magical story!")
+# Initialize models
+@st.cache_resource
+def load_models():
+    image_to_text = pipeline("image-to-text", model="microsoft/git-base-coco")
+    story_generator = pipeline("text-generation", model="gpt2")
+    return image_to_text, story_generator
+image_to_text, story_generator = load_models()
+# Function to generate caption from image
+def generate_caption(image):
+    caption = image_to_text(image)[0]['generated_text']
+    return caption
+# Function to generate story from caption
+def generate_story(caption):
+    prompt = f"Once upon a time, {caption} "
+    story = story_generator(prompt, max_length=200, do_sample=True)[0]['generated_text']
+    # Ensure the story is at least 100 words
+    while len(story.split()) < 100:
+        additional_text = story_generator(story, max_length=100, do_sample=True)[0]['generated_text']
+        story += additional_text
+    return story
+# Function to convert text to speech
+def text_to_speech(text):
+    tts = gTTS(text=text, lang='en', slow=False)
+    audio_file = "story_audio.mp3"
+    tts.save(audio_file)
+    return audio_file
+# File uploader
+uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
+if uploaded_file is not None:
+    # Display the uploaded image
+    image = Image.open(uploaded_file)
+    st.image(image, caption='Uploaded Image', use_column_width=True)
+    # Generate button
+    if st.button("Generate Story"):
+        with st.spinner("Generating your story..."):
+            # Generate caption
+            caption = generate_caption(image)
+            st.write("Image caption:", caption)
+            # Generate story
+            story = generate_story(caption)
+            st.write("### Your Story")
+            st.write(story)
+            # Generate audio
+            audio_file = text_to_speech(story)
+            # Display audio
+            st.write("### Listen to your story")
+            st.audio(audio_file)
+st.markdown("---")
+st.write("Created for ISOM5240 Assignment")