CR7CAD commited on
Commit
90bef38
·
verified ·
1 Parent(s): 7704b1e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +65 -110
app.py CHANGED
@@ -1,119 +1,74 @@
1
- import os
2
- import argparse
3
- from PIL import Image
4
  from transformers import pipeline
 
 
 
 
5
 
6
- def load_model():
7
- """Load the image-to-text model."""
8
- print("Loading image-to-text model...")
9
- try:
10
- pipe = pipeline("image-to-text", model="naver-clova-ix/donut-base")
11
- print("Model loaded successfully")
12
- return pipe
13
- except Exception as e:
14
- print(f"Error loading model: {str(e)}")
15
- raise
16
 
17
- def extract_text_from_image(image_path, model):
18
- """Extract text from an image using the loaded model.
19
-
20
- Args:
21
- image_path (str): Path to the image file
22
- model: The loaded image-to-text pipeline
23
-
24
- Returns:
25
- str: Extracted text from the image
26
- """
27
- try:
28
- # Check if the file exists
29
- if not os.path.exists(image_path):
30
- raise FileNotFoundError(f"Image file not found: {image_path}")
31
-
32
- # Open and process the image
33
- image = Image.open(image_path)
34
-
35
- # Extract text using the model
36
- result = model(image)
37
-
38
- # Get the generated text from the result
39
- if result and len(result) > 0:
40
- return result[0]['generated_text']
41
- else:
42
- return "No text detected in the image"
43
-
44
- except Exception as e:
45
- print(f"Error processing image: {str(e)}")
46
- return f"Error: {str(e)}"
47
 
48
- def process_directory(directory_path, model, output_file=None):
49
- """Process all images in a directory.
50
-
51
- Args:
52
- directory_path (str): Path to directory containing images
53
- model: The loaded image-to-text pipeline
54
- output_file (str, optional): Path to save results to a text file
55
- """
56
- results = {}
57
-
58
- # Check if the directory exists
59
- if not os.path.exists(directory_path):
60
- print(f"Directory not found: {directory_path}")
61
- return
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
 
63
- # Process each file in the directory
64
- for filename in os.listdir(directory_path):
65
- # Check if the file is an image
66
- if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp', '.webp')):
67
- image_path = os.path.join(directory_path, filename)
68
- print(f"Processing {filename}...")
69
 
70
- # Extract text from the image
71
- text = extract_text_from_image(image_path, model)
72
- results[filename] = text
 
73
 
74
- print(f"Result for {filename}: {text}")
75
-
76
- # Save results to a file if output_file is specified
77
- if output_file and results:
78
- with open(output_file, 'w', encoding='utf-8') as f:
79
- for filename, text in results.items():
80
- f.write(f"File: {filename}\n")
81
- f.write(f"Text: {text}\n")
82
- f.write("-" * 50 + "\n")
83
- print(f"Results saved to {output_file}")
84
-
85
- return results
86
-
87
- def main():
88
- # Parse command line arguments
89
- parser = argparse.ArgumentParser(description='Extract text from images using Donut model')
90
- parser.add_argument('--image', help='Path to an image file')
91
- parser.add_argument('--dir', help='Path to a directory containing images')
92
- parser.add_argument('--output', help='Path to save output to a text file')
93
-
94
- args = parser.parse_args()
95
-
96
- # Load the model
97
- model = load_model()
98
-
99
- # Process a single image or a directory of images
100
- if args.image:
101
- # Process a single image
102
- text = extract_text_from_image(args.image, model)
103
- print(f"Extracted text: {text}")
104
-
105
- # Save to file if output is specified
106
- if args.output:
107
- with open(args.output, 'w', encoding='utf-8') as f:
108
- f.write(f"File: {os.path.basename(args.image)}\n")
109
- f.write(f"Text: {text}\n")
110
- print(f"Result saved to {args.output}")
111
 
112
- elif args.dir:
113
- # Process a directory of images
114
- process_directory(args.dir, model, args.output)
115
- else:
116
- print("Please provide either --image or --dir argument")
117
 
118
- if __name__ == "__main__":
119
- main()
 
1
+ import streamlit as st
 
 
2
  from transformers import pipeline
3
+ from PIL import Image
4
+ import io
5
+ from gtts import gTTS
6
+ import time
7
 
8
+ # Set page title
9
+ st.set_page_config(page_title="Kids Story Generator")
 
 
 
 
 
 
 
 
10
 
11
+ # Title and introduction
12
+ st.title("Kids Story Generator")
13
+ st.write("Upload a picture and let's create a magical story!")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
+ # Initialize models
16
+ @st.cache_resource
17
+ def load_models():
18
+ image_to_text = pipeline("image-to-text", model="microsoft/git-base-coco")
19
+ story_generator = pipeline("text-generation", model="gpt2")
20
+ return image_to_text, story_generator
21
+
22
+ image_to_text, story_generator = load_models()
23
+
24
+ # Function to generate caption from image
25
+ def generate_caption(image):
26
+ caption = image_to_text(image)[0]['generated_text']
27
+ return caption
28
+
29
+ # Function to generate story from caption
30
+ def generate_story(caption):
31
+ prompt = f"Once upon a time, {caption} "
32
+ story = story_generator(prompt, max_length=200, do_sample=True)[0]['generated_text']
33
+ # Ensure the story is at least 100 words
34
+ while len(story.split()) < 100:
35
+ additional_text = story_generator(story, max_length=100, do_sample=True)[0]['generated_text']
36
+ story += additional_text
37
+ return story
38
+
39
+ # Function to convert text to speech
40
+ def text_to_speech(text):
41
+ tts = gTTS(text=text, lang='en', slow=False)
42
+ audio_file = "story_audio.mp3"
43
+ tts.save(audio_file)
44
+ return audio_file
45
+
46
+ # File uploader
47
+ uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
48
+
49
+ if uploaded_file is not None:
50
+ # Display the uploaded image
51
+ image = Image.open(uploaded_file)
52
+ st.image(image, caption='Uploaded Image', use_column_width=True)
53
 
54
+ # Generate button
55
+ if st.button("Generate Story"):
56
+ with st.spinner("Generating your story..."):
57
+ # Generate caption
58
+ caption = generate_caption(image)
59
+ st.write("Image caption:", caption)
60
 
61
+ # Generate story
62
+ story = generate_story(caption)
63
+ st.write("### Your Story")
64
+ st.write(story)
65
 
66
+ # Generate audio
67
+ audio_file = text_to_speech(story)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
 
69
+ # Display audio
70
+ st.write("### Listen to your story")
71
+ st.audio(audio_file)
 
 
72
 
73
+ st.markdown("---")
74
+ st.write("Created for ISOM5240 Assignment")