AlBaraa63 commited on
Commit
d5841ad
·
0 Parent(s):

Simple clean UI version

Browse files
Files changed (10) hide show
  1. README.md +118 -0
  2. app.py +143 -0
  3. inputs/test1.png +0 -0
  4. inputs/test2.png +0 -0
  5. main.py +82 -0
  6. outputs/test1.txt +11 -0
  7. outputs/test2.txt +5 -0
  8. packages.txt +2 -0
  9. preprocessing.py +193 -0
  10. requirements.txt +5 -0
README.md ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Text Detection Demo
3
+ emoji: 📝
4
+ colorFrom: blue
5
+ colorTo: green
6
+ sdk: gradio
7
+ sdk_version: 5.49.1
8
+ app_file: app.py
9
+ pinned: false
10
+ license: apache-2.0
11
+ ---
12
+
13
+ # 📝 Text Detection Demo
14
+
15
+ Extract text from any image using OCR (Optical Character Recognition).
16
+
17
+ ## 🎯 What It Does
18
+
19
+ Upload an image → AI extracts the text → Copy and use!
20
+
21
+ ## 🚀 Try it Live
22
+
23
+ **Demo:** https://huggingface.co/spaces/AlBaraa63/text_detection
24
+
25
+ ## 📁 Files
26
+
27
+ ```
28
+ text_detection/
29
+ ├── app.py # Gradio web demo
30
+ ├── main.py # CLI version
31
+ ├── preprocessing.py # Image processing helpers
32
+ ├── requirements.txt # Dependencies
33
+ ├── packages.txt # System dependencies
34
+ └── README.md # This file
35
+ ```
36
+
37
+ ## �️ Setup
38
+
39
+ ### 1. Install Tesseract OCR
40
+ - **Windows:** Download from [here](https://github.com/UB-Mannheim/tesseract/wiki)
41
+ - Install to: `C:\Program Files\Tesseract-OCR`
42
+
43
+ ### 2. Install Python Packages
44
+ ```bash
45
+ pip install -r requirements.txt
46
+ ```
47
+
48
+ Or manually:
49
+ ```bash
50
+ pip install opencv-python pytesseract numpy
51
+ ```
52
+
53
+ ### 3. Test Installation
54
+ ```bash
55
+ python test_tesseract.py
56
+ ```
57
+
58
+ ## 🚀 Usage
59
+
60
+ ### Simple - Run and Enter Path
61
+ ```bash
62
+ python main.py
63
+ ```
64
+ Then enter your image path when asked.
65
+
66
+ ### Example
67
+ ```bash
68
+ python main.py
69
+ # Enter: inputs/image.png
70
+ ```
71
+
72
+ ## 📝 Example
73
+
74
+ **Input Image:** Screenshot with text
75
+ **Output:** Text file with detected text
76
+
77
+ ```
78
+ Image: image.png
79
+ Size: 869 x 296 pixels
80
+
81
+ DETECTED TEXT:
82
+ Mix - antent - homesick (super slowed)
83
+ Mixes are playlists YouTube makes for you
84
+
85
+ ✅ Text saved to: output.txt
86
+ ```
87
+
88
+ ## 🎓 How It Works
89
+
90
+ 1. **Load Image** - Read the image file
91
+ 2. **Preprocess** - Convert to grayscale and enhance
92
+ 3. **OCR** - Extract text using Tesseract
93
+ 4. **Save** - Write text to output.txt
94
+
95
+ ## 📊 What's Included
96
+
97
+ - **1 sample image** in `inputs/` folder for testing
98
+ - Works with any image format (PNG, JPG, etc.)
99
+ - Clean and minimal - perfect for learning!
100
+
101
+ ## 💡 Tips
102
+
103
+ - Works best with clear, high-contrast images
104
+ - Screenshots work great
105
+ - Photos might need better lighting
106
+ - Larger images = better accuracy
107
+
108
+ ## � Next Steps
109
+
110
+ Once you understand this basic version, you can:
111
+ - Add preprocessing options
112
+ - Batch process multiple images
113
+ - Add confidence scores
114
+ - Try different languages
115
+
116
+ ---
117
+
118
+ *Simple text detection for learning* 🎓
app.py ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Text Detection Demo with Gradio
3
+ Extract text from images using OCR
4
+ """
5
+ import gradio as gr
6
+ import cv2
7
+ import pytesseract
8
+ import numpy as np
9
+ from PIL import Image
10
+ import os
11
+
12
+ # Set Tesseract path (will be overridden in cloud deployment)
13
+ if os.path.exists(r'C:\Program Files\Tesseract-OCR\tesseract.exe'):
14
+ pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
15
+
16
+
17
+ def extract_text_from_image(image):
18
+ """
19
+ Extract text from an uploaded image
20
+ Args:
21
+ image: PIL Image or numpy array
22
+ Returns:
23
+ tuple: (processed_image, extracted_text)
24
+ """
25
+ try:
26
+ # Convert PIL Image to numpy array if needed
27
+ if isinstance(image, Image.Image):
28
+ image = np.array(image)
29
+
30
+ # Convert RGB to BGR for OpenCV
31
+ if len(image.shape) == 3 and image.shape[2] == 3:
32
+ img = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
33
+ else:
34
+ img = image
35
+
36
+ # Convert to grayscale
37
+ gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
38
+
39
+ # Apply thresholding for better OCR
40
+ _, threshold = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
41
+
42
+ # Extract text using Tesseract
43
+ text = pytesseract.image_to_string(threshold)
44
+
45
+ # Clean up the text
46
+ text = text.strip()
47
+
48
+ if not text:
49
+ text = "⚠️ No text detected in the image.\n\nTips:\n- Make sure the image contains clear text\n- Try an image with higher resolution\n- Ensure good contrast between text and background"
50
+
51
+ # Convert processed image back to RGB for display
52
+ processed_display = cv2.cvtColor(threshold, cv2.COLOR_GRAY2RGB)
53
+
54
+ return processed_display, text
55
+
56
+ except Exception as e:
57
+ error_msg = f"❌ Error processing image: {str(e)}\n\nPlease try another image."
58
+ return image, error_msg
59
+
60
+
61
+ # Create Gradio interface
62
+ with gr.Blocks(theme=gr.themes.Soft(), title="Text Detection Demo") as demo:
63
+
64
+ gr.Markdown(
65
+ """
66
+ # 📝 Text Detection Demo
67
+ ### Extract text from any image using OCR
68
+
69
+ Upload an image containing text, and the AI will extract all readable text from it.
70
+ Perfect for documents, screenshots, photos of signs, and more!
71
+ """
72
+ )
73
+
74
+ with gr.Row():
75
+ with gr.Column():
76
+ input_image = gr.Image(
77
+ label="Upload Image",
78
+ type="pil",
79
+ height=400
80
+ )
81
+
82
+ extract_btn = gr.Button(
83
+ "🔍 Extract Text",
84
+ variant="primary",
85
+ size="lg"
86
+ )
87
+
88
+ gr.Markdown(
89
+ """
90
+ ### 💡 Tips for best results:
91
+ - Use clear, high-resolution images
92
+ - Ensure good lighting and contrast
93
+ - Avoid blurry or distorted text
94
+ - Works with printed and digital text
95
+ """
96
+ )
97
+
98
+ with gr.Column():
99
+ output_image = gr.Image(
100
+ label="Processed Image (Thresholded)",
101
+ height=400
102
+ )
103
+
104
+ output_text = gr.Textbox(
105
+ label="Extracted Text",
106
+ lines=10,
107
+ placeholder="Extracted text will appear here...",
108
+ show_copy_button=True
109
+ )
110
+
111
+ # Example images section
112
+ gr.Markdown("### 📸 Try these examples:")
113
+ gr.Examples(
114
+ examples=[
115
+ ["inputs/test1.jpg"] if os.path.exists("inputs/test1.jpg") else None,
116
+ ["inputs/test2.jpg"] if os.path.exists("inputs/test2.jpg") else None,
117
+ ],
118
+ inputs=input_image,
119
+ label="Sample Images"
120
+ )
121
+
122
+ # Connect the button to the function
123
+ extract_btn.click(
124
+ fn=extract_text_from_image,
125
+ inputs=input_image,
126
+ outputs=[output_image, output_text]
127
+ )
128
+
129
+ # Footer
130
+ gr.Markdown(
131
+ """
132
+ ---
133
+ Made with ❤️ using Gradio and Tesseract OCR
134
+ """
135
+ )
136
+
137
+ # Launch the app
138
+ if __name__ == "__main__":
139
+ demo.launch(
140
+ share=True, # Creates a public shareable link
141
+ server_name="0.0.0.0", # Allow external connections
142
+ server_port=7860
143
+ )
inputs/test1.png ADDED
inputs/test2.png ADDED
main.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Simple Text Detection - Extract text from any image
3
+ Just run: python main.py
4
+ """
5
+ import cv2
6
+ import pytesseract
7
+ import os
8
+
9
+ # Set Tesseract path (Windows)
10
+ pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
11
+
12
+
13
+ def extract_text(image_path):
14
+ """Extract text from an image"""
15
+
16
+ # Read image
17
+ img = cv2.imread(image_path)
18
+
19
+ if img is None:
20
+ print(f"Could not read image: {image_path}")
21
+ print("Make sure the file path is correct")
22
+ return None
23
+
24
+ # Convert to grayscale
25
+ gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
26
+
27
+ # Apply thresholding to make text clearer
28
+ _, threshold = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
29
+
30
+ # Extract text using OCR
31
+ print("\nExtracting text...")
32
+ text = pytesseract.image_to_string(threshold)
33
+
34
+ # Clean up the text
35
+ text = text.strip()
36
+
37
+ if text:
38
+ print("\nDETECTED TEXT:")
39
+ print("="*30)
40
+ print(text)
41
+ print("="*30)
42
+
43
+ # Create outputs folder if it doesn't exist
44
+ os.makedirs("outputs", exist_ok=True)
45
+
46
+ # Get image filename without extension
47
+ image_name = os.path.splitext(os.path.basename(image_path))[0]
48
+
49
+ # Save to file in outputs folder with same name as image
50
+ output_file = os.path.join("outputs", f"{image_name}.txt")
51
+ with open(output_file, 'w', encoding='utf-8') as f:
52
+ f.write(text)
53
+ print(f"\nText saved to: {output_file}")
54
+
55
+ return text
56
+ else:
57
+ print("\nNo text detected in the image")
58
+ return None
59
+
60
+
61
+ def main():
62
+ """Main function"""
63
+
64
+ print("\nSimple Text Detection Tool")
65
+
66
+ image_path = input("\nEnter img path: ").strip()
67
+
68
+ # Remove quotes if user copied path with quotes
69
+ image_path = image_path.strip('"').strip("'")
70
+
71
+ # Check if file exists
72
+ if not os.path.exists(image_path):
73
+ print(f"\nFile not found: {image_path}")
74
+ print(" Please check the path and try again")
75
+ return
76
+
77
+ # Extract text
78
+ extract_text(image_path)
79
+
80
+
81
+ if __name__ == "__main__":
82
+ main()
outputs/test1.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Tesseract installer for Windows
2
+
3
+ Normally we run Tesseract on Debian GNU Linux, but there was also the need for a Windows version.
4
+ That's why we have built a Tesseract installer for Windows.
5
+
6
+ WARNING: Tesseract should be either installed in the directory which is suggested during the
7
+ installation or in a new directory. The uninstaller removes the whole installation directory. If you
8
+ installed Tesseract in an existing directory, that directory will be removed with all its subdirectories
9
+ and files.
10
+
11
+ The latest installers can be downloaded here:
outputs/test2.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ Tt was the best of
2
+ times, it was the worst
3
+ of times, it was the age
4
+ of wisdom, it was the
5
+ age of foolishness...
packages.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ tesseract-ocr
2
+ tesseract-ocr-eng
preprocessing.py ADDED
@@ -0,0 +1,193 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Preprocessing functions to improve OCR accuracy
3
+ Includes various image enhancement techniques
4
+ """
5
+ import cv2
6
+ import numpy as np
7
+
8
+
9
+ def convert_to_grayscale(img):
10
+ """Convert image to grayscale"""
11
+ if len(img.shape) == 3:
12
+ return cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
13
+ return img
14
+
15
+
16
+ def apply_thresholding(img, method='otsu'):
17
+ """
18
+ Apply thresholding to image
19
+
20
+ Methods:
21
+ - 'otsu': Otsu's automatic thresholding
22
+ - 'adaptive': Adaptive thresholding
23
+ - 'binary': Simple binary thresholding
24
+ """
25
+ gray = convert_to_grayscale(img)
26
+
27
+ if method == 'otsu':
28
+ # Otsu's thresholding - automatic threshold selection
29
+ _, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
30
+
31
+ elif method == 'adaptive':
32
+ # Adaptive thresholding - good for varying lighting
33
+ thresh = cv2.adaptiveThreshold(
34
+ gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
35
+ cv2.THRESH_BINARY, 11, 2
36
+ )
37
+
38
+ elif method == 'binary':
39
+ # Simple binary thresholding
40
+ _, thresh = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
41
+
42
+ else:
43
+ thresh = gray
44
+
45
+ return thresh
46
+
47
+
48
+ def remove_noise(img, method='median'):
49
+ """
50
+ Remove noise from image
51
+
52
+ Methods:
53
+ - 'median': Median blur (good for salt-and-pepper noise)
54
+ - 'gaussian': Gaussian blur (general smoothing)
55
+ - 'bilateral': Bilateral filter (preserves edges)
56
+ """
57
+ if method == 'median':
58
+ return cv2.medianBlur(img, 3)
59
+
60
+ elif method == 'gaussian':
61
+ return cv2.GaussianBlur(img, (5, 5), 0)
62
+
63
+ elif method == 'bilateral':
64
+ return cv2.bilateralFilter(img, 9, 75, 75)
65
+
66
+ return img
67
+
68
+
69
+ def dilate_text(img, kernel_size=(1, 1)):
70
+ """Dilate text to make it thicker"""
71
+ kernel = np.ones(kernel_size, np.uint8)
72
+ return cv2.dilate(img, kernel, iterations=1)
73
+
74
+
75
+ def erode_text(img, kernel_size=(1, 1)):
76
+ """Erode text to make it thinner"""
77
+ kernel = np.ones(kernel_size, np.uint8)
78
+ return cv2.erode(img, kernel, iterations=1)
79
+
80
+
81
+ def invert_image(img):
82
+ """Invert image colors (useful if text is white on black)"""
83
+ return cv2.bitwise_not(img)
84
+
85
+
86
+ def enhance_contrast(img):
87
+ """Enhance image contrast using CLAHE"""
88
+ gray = convert_to_grayscale(img)
89
+ clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
90
+ return clahe.apply(gray)
91
+
92
+
93
+ def resize_image(img, scale=2.0):
94
+ """
95
+ Resize image for better OCR
96
+ Larger images often work better with Tesseract
97
+ """
98
+ height, width = img.shape[:2]
99
+ new_width = int(width * scale)
100
+ new_height = int(height * scale)
101
+ return cv2.resize(img, (new_width, new_height), interpolation=cv2.INTER_CUBIC)
102
+
103
+
104
+ def add_border(img, border_size=10, color=255):
105
+ """Add white border around image"""
106
+ return cv2.copyMakeBorder(
107
+ img, border_size, border_size, border_size, border_size,
108
+ cv2.BORDER_CONSTANT, value=color
109
+ )
110
+
111
+
112
+ def preprocess_pipeline(img, config='default'):
113
+ """
114
+ Complete preprocessing pipeline
115
+
116
+ Configs:
117
+ - 'default': Standard preprocessing
118
+ - 'aggressive': More aggressive preprocessing
119
+ - 'light': Light preprocessing
120
+ - 'custom': Custom pipeline
121
+ """
122
+ if config == 'default':
123
+ # Standard pipeline
124
+ processed = convert_to_grayscale(img)
125
+ processed = remove_noise(processed, 'median')
126
+ processed = apply_thresholding(processed, 'otsu')
127
+ processed = add_border(processed, 10)
128
+
129
+ elif config == 'aggressive':
130
+ # Aggressive preprocessing
131
+ processed = convert_to_grayscale(img)
132
+ processed = enhance_contrast(processed)
133
+ processed = remove_noise(processed, 'bilateral')
134
+ processed = apply_thresholding(processed, 'adaptive')
135
+ processed = dilate_text(processed, (2, 2))
136
+ processed = add_border(processed, 15)
137
+
138
+ elif config == 'light':
139
+ # Light preprocessing
140
+ processed = convert_to_grayscale(img)
141
+ processed = apply_thresholding(processed, 'otsu')
142
+
143
+ elif config == 'upscale':
144
+ # Upscale and process
145
+ processed = resize_image(img, scale=3.0)
146
+ processed = convert_to_grayscale(processed)
147
+ processed = remove_noise(processed, 'median')
148
+ processed = apply_thresholding(processed, 'otsu')
149
+ processed = add_border(processed, 20)
150
+
151
+ else:
152
+ # No preprocessing
153
+ processed = img
154
+
155
+ return processed
156
+
157
+
158
+ def preprocess_for_ocr(img, show_steps=False):
159
+ """
160
+ Optimized preprocessing for OCR
161
+ Returns preprocessed image ready for Tesseract
162
+ """
163
+ steps = {}
164
+
165
+ # Step 1: Convert to grayscale
166
+ gray = convert_to_grayscale(img)
167
+ if show_steps:
168
+ steps['1_grayscale'] = gray.copy()
169
+
170
+ # Step 2: Upscale image (Tesseract works better with larger images)
171
+ upscaled = resize_image(gray, scale=2.5)
172
+ if show_steps:
173
+ steps['2_upscaled'] = upscaled.copy()
174
+
175
+ # Step 3: Remove noise
176
+ denoised = remove_noise(upscaled, 'bilateral')
177
+ if show_steps:
178
+ steps['3_denoised'] = denoised.copy()
179
+
180
+ # Step 4: Apply thresholding
181
+ thresh = apply_thresholding(denoised, 'otsu')
182
+ if show_steps:
183
+ steps['4_threshold'] = thresh.copy()
184
+
185
+ # Step 5: Add border
186
+ bordered = add_border(thresh, 20)
187
+ if show_steps:
188
+ steps['5_bordered'] = bordered.copy()
189
+
190
+ if show_steps:
191
+ return bordered, steps
192
+
193
+ return bordered
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ gradio
2
+ opencv-python-headless
3
+ pytesseract
4
+ numpy
5
+ pillow