CR7CAD commited on
Commit
5b9e396
·
verified ·
1 Parent(s): 8525085

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +111 -28
app.py CHANGED
@@ -1,36 +1,119 @@
1
- import streamlit as st
2
- from transformers import pipeline
3
  from PIL import Image
 
4
 
5
- # Set the title of the app
6
- st.title("Image-to-Text Converter using Donut")
7
-
8
- # Description of the app
9
- st.write("Upload an image to extract text using the Donut model (naver-clova-ix/donut-base).")
10
-
11
- # Create a file uploader for image files
12
- uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
13
-
14
- # Initialize the pipeline
15
- @st.cache_resource(show_spinner=False)
16
- def load_pipeline():
17
- return pipeline("image-to-text", model="naver-clova-ix/donut-base")
18
-
19
- pipe = load_pipeline()
20
 
21
- if uploaded_file is not None:
 
 
 
 
 
 
 
 
 
22
  try:
23
- # Open the image file and convert to RGB (if necessary)
24
- image = Image.open(uploaded_file).convert("RGB")
25
- st.image(image, caption="Uploaded Image", use_column_width=True)
26
 
27
- # Process the image through the pipeline
28
- result = pipe(image)
29
 
30
- # Extract generated text from the result list
31
- generated_text = result[0].get("generated_text", "No text generated.")
32
 
33
- st.subheader("Extracted Text")
34
- st.text_area("Result", generated_text, height=200)
 
 
 
 
35
  except Exception as e:
36
- st.error(f"An error occurred: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import argparse
3
  from PIL import Image
4
+ from transformers import pipeline
5
 
6
+ def load_model():
7
+ """Load the image-to-text model."""
8
+ print("Loading image-to-text model...")
9
+ try:
10
+ pipe = pipeline("image-to-text", model="naver-clova-ix/donut-base")
11
+ print("Model loaded successfully")
12
+ return pipe
13
+ except Exception as e:
14
+ print(f"Error loading model: {str(e)}")
15
+ raise
 
 
 
 
 
16
 
17
+ def extract_text_from_image(image_path, model):
18
+ """Extract text from an image using the loaded model.
19
+
20
+ Args:
21
+ image_path (str): Path to the image file
22
+ model: The loaded image-to-text pipeline
23
+
24
+ Returns:
25
+ str: Extracted text from the image
26
+ """
27
  try:
28
+ # Check if the file exists
29
+ if not os.path.exists(image_path):
30
+ raise FileNotFoundError(f"Image file not found: {image_path}")
31
 
32
+ # Open and process the image
33
+ image = Image.open(image_path)
34
 
35
+ # Extract text using the model
36
+ result = model(image)
37
 
38
+ # Get the generated text from the result
39
+ if result and len(result) > 0:
40
+ return result[0]['generated_text']
41
+ else:
42
+ return "No text detected in the image"
43
+
44
  except Exception as e:
45
+ print(f"Error processing image: {str(e)}")
46
+ return f"Error: {str(e)}"
47
+
48
+ def process_directory(directory_path, model, output_file=None):
49
+ """Process all images in a directory.
50
+
51
+ Args:
52
+ directory_path (str): Path to directory containing images
53
+ model: The loaded image-to-text pipeline
54
+ output_file (str, optional): Path to save results to a text file
55
+ """
56
+ results = {}
57
+
58
+ # Check if the directory exists
59
+ if not os.path.exists(directory_path):
60
+ print(f"Directory not found: {directory_path}")
61
+ return
62
+
63
+ # Process each file in the directory
64
+ for filename in os.listdir(directory_path):
65
+ # Check if the file is an image
66
+ if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp', '.webp')):
67
+ image_path = os.path.join(directory_path, filename)
68
+ print(f"Processing {filename}...")
69
+
70
+ # Extract text from the image
71
+ text = extract_text_from_image(image_path, model)
72
+ results[filename] = text
73
+
74
+ print(f"Result for {filename}: {text}")
75
+
76
+ # Save results to a file if output_file is specified
77
+ if output_file and results:
78
+ with open(output_file, 'w', encoding='utf-8') as f:
79
+ for filename, text in results.items():
80
+ f.write(f"File: {filename}\n")
81
+ f.write(f"Text: {text}\n")
82
+ f.write("-" * 50 + "\n")
83
+ print(f"Results saved to {output_file}")
84
+
85
+ return results
86
+
87
+ def main():
88
+ # Parse command line arguments
89
+ parser = argparse.ArgumentParser(description='Extract text from images using Donut model')
90
+ parser.add_argument('--image', help='Path to an image file')
91
+ parser.add_argument('--dir', help='Path to a directory containing images')
92
+ parser.add_argument('--output', help='Path to save output to a text file')
93
+
94
+ args = parser.parse_args()
95
+
96
+ # Load the model
97
+ model = load_model()
98
+
99
+ # Process a single image or a directory of images
100
+ if args.image:
101
+ # Process a single image
102
+ text = extract_text_from_image(args.image, model)
103
+ print(f"Extracted text: {text}")
104
+
105
+ # Save to file if output is specified
106
+ if args.output:
107
+ with open(args.output, 'w', encoding='utf-8') as f:
108
+ f.write(f"File: {os.path.basename(args.image)}\n")
109
+ f.write(f"Text: {text}\n")
110
+ print(f"Result saved to {args.output}")
111
+
112
+ elif args.dir:
113
+ # Process a directory of images
114
+ process_directory(args.dir, model, args.output)
115
+ else:
116
+ print("Please provide either --image or --dir argument")
117
+
118
+ if __name__ == "__main__":
119
+ main()