buildinves commited on
Commit
eebc67e
·
verified ·
1 Parent(s): d8b1429

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +357 -160
app.py CHANGED
@@ -1,190 +1,387 @@
1
- #!/usr/bin/env python3
2
- """
3
- Local testing script for the Subdivision Plan Analyzer
4
- Run this before deploying to Hugging Face Spaces to ensure everything works
5
- """
6
-
7
- import sys
 
 
8
  import os
9
 
10
- def test_imports():
11
- """Test if all required packages can be imported"""
12
- print("Testing imports...")
 
 
 
 
 
 
 
 
 
13
 
14
- try:
15
- import gradio as gr
16
- print("✓ Gradio imported successfully")
17
- except ImportError as e:
18
- print(f"✗ Failed to import Gradio: {e}")
19
- return False
20
 
21
- try:
22
- import pandas as pd
23
- print("✓ Pandas imported successfully")
24
- except ImportError as e:
25
- print(f"✗ Failed to import Pandas: {e}")
26
- return False
27
 
28
- try:
29
- import numpy as np
30
- print("✓ NumPy imported successfully")
31
- except ImportError as e:
32
- print(f"✗ Failed to import NumPy: {e}")
33
- return False
34
 
35
- try:
36
- import PIL
37
- print("✓ PIL imported successfully")
38
- except ImportError as e:
39
- print(f"✗ Failed to import PIL: {e}")
40
- return False
 
 
 
 
 
41
 
42
- try:
43
- import easyocr
44
- print("✓ EasyOCR imported successfully")
45
- except ImportError as e:
46
- print(f"✗ Failed to import EasyOCR: {e}")
47
- return False
48
 
49
- try:
50
- import cv2
51
- print("✓ OpenCV imported successfully")
52
- except ImportError as e:
53
- print(f"✗ Failed to import OpenCV: {e}")
54
- return False
55
 
56
- try:
57
- import skimage
58
- print("✓ scikit-image imported successfully")
59
- except ImportError as e:
60
- print(f"✗ Failed to import scikit-image: {e}")
61
- return False
62
 
63
- try:
64
- import scipy
65
- print("✓ SciPy imported successfully")
66
- except ImportError as e:
67
- print(f"✗ Failed to import SciPy: {e}")
68
- return False
69
 
70
- try:
71
- import matplotlib
72
- print("✓ Matplotlib imported successfully")
73
- except ImportError as e:
74
- print(f"✗ Failed to import Matplotlib: {e}")
75
- return False
 
 
 
 
 
 
76
 
77
- return True
78
 
79
- def test_easyocr_init():
80
- """Test if EasyOCR can be initialized"""
81
- print("\nTesting EasyOCR initialization...")
82
 
83
- try:
84
- import easyocr
85
- reader = easyocr.Reader(['en'])
86
- print("✓ EasyOCR reader initialized successfully")
87
- print(" Note: First run will download models (~64MB)")
88
- return True
89
- except Exception as e:
90
- print(f"✗ Failed to initialize EasyOCR: {e}")
91
- return False
92
-
93
- def test_app_import():
94
- """Test if the main app can be imported"""
95
- print("\nTesting app.py import...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
 
97
- try:
98
- import app
99
- print("✓ app.py imported successfully")
100
- return True
101
- except Exception as e:
102
- print(f"✗ Failed to import app.py: {e}")
103
- print(f" Error: {e}")
104
- return False
105
 
106
- def create_test_image():
107
- """Create a simple test image"""
108
- print("\nCreating test image...")
109
-
110
  try:
111
- from PIL import Image, ImageDraw, ImageFont
 
 
 
 
 
112
 
113
- # Create a white image
114
- img = Image.new('RGB', (800, 600), color='white')
115
- draw = ImageDraw.Draw(img)
 
116
 
117
- # Draw some lot boundaries
118
- draw.rectangle([100, 100, 300, 250], outline='black', width=2)
119
- draw.rectangle([300, 100, 500, 250], outline='black', width=2)
120
- draw.rectangle([100, 250, 300, 400], outline='black', width=2)
121
- draw.rectangle([300, 250, 500, 400], outline='black', width=2)
122
 
123
- # Add lot numbers and areas
 
 
 
 
 
 
 
 
 
 
 
 
124
  try:
125
- font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", 20)
126
  except:
127
  font = ImageFont.load_default()
128
 
129
- draw.text((150, 120), "Lot 692", fill='black', font=font)
130
- draw.text((150, 160), "234m²", fill='black', font=font)
131
- draw.text((150, 180), "15.6", fill='black', font=font)
132
- draw.text((150, 200), "15.0", fill='black', font=font)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
 
134
- draw.text((350, 120), "Lot 690", fill='black', font=font)
135
- draw.text((350, 160), "117m²", fill='black', font=font)
136
- draw.text((350, 180), "7.8", fill='black', font=font)
137
- draw.text((350, 200), "15.0", fill='black', font=font)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
 
139
- img.save('test_subdivision.png')
140
- print("✓ Test image created: test_subdivision.png")
141
- return True
142
  except Exception as e:
143
- print(f" Failed to create test image: {e}")
144
- return False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
 
146
- def main():
147
- """Run all tests"""
148
- print("=== Subdivision Plan Analyzer - Local Testing ===\n")
149
-
150
- all_passed = True
151
-
152
- # Test imports
153
- if not test_imports():
154
- all_passed = False
155
- print("\n⚠️ Some imports failed. Please install missing packages:")
156
- print(" pip install -r requirements.txt")
157
-
158
- # Test EasyOCR
159
- if not test_easyocr_init():
160
- all_passed = False
161
- print("\n⚠️ EasyOCR initialization failed.")
162
-
163
- # Test app import
164
- if not test_app_import():
165
- all_passed = False
166
- print("\n⚠️ Failed to import app.py. Check for syntax errors.")
167
-
168
- # Create test image
169
- if not create_test_image():
170
- all_passed = False
171
- print("\n⚠️ Failed to create test image.")
172
-
173
- # Summary
174
- print("\n=== Test Summary ===")
175
- if all_passed:
176
- print("✅ All tests passed! Your app is ready for deployment.")
177
- print("\nTo run the app locally:")
178
- print(" python app.py")
179
- print("\nTo deploy to Hugging Face Spaces:")
180
- print(" 1. Create a new Space on huggingface.co")
181
- print(" 2. Upload app.py, requirements.txt, and README.md")
182
- print(" 3. Wait for the build to complete")
183
- else:
184
- print("❌ Some tests failed. Please fix the issues before deployment.")
185
-
186
- return all_passed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
187
 
 
188
  if __name__ == "__main__":
189
- success = main()
190
- sys.exit(0 if success else 1)
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import numpy as np
4
+ from PIL import Image, ImageDraw, ImageFont
5
+ import easyocr
6
+ import cv2
7
+ import io
8
+ import re
9
+ from scipy.spatial import distance
10
  import os
11
 
12
+ # Initialize EasyOCR reader
13
+ print("Initializing EasyOCR...")
14
+ reader = easyocr.Reader(['en'])
15
+ print("EasyOCR initialized successfully!")
16
+
17
+ def extract_lot_info(text):
18
+ """Extract lot number, area, and dimensions from OCR text"""
19
+ lot_info = {
20
+ 'lot_numbers': [],
21
+ 'areas': [],
22
+ 'dimensions': []
23
+ }
24
 
25
+ # Clean text
26
+ text = str(text).strip()
 
 
 
 
27
 
28
+ # Extract lot numbers (3-4 digit numbers)
29
+ if text.isdigit() and 100 <= int(text) <= 9999:
30
+ lot_info['lot_numbers'].append(text)
 
 
 
31
 
32
+ # Extract areas (numbers followed by m² or m2)
33
+ area_pattern = r'(\d+)\s*m[²2]'
34
+ area_matches = re.findall(area_pattern, text, re.IGNORECASE)
35
+ for match in area_matches:
36
+ lot_info['areas'].append(int(match))
 
37
 
38
+ # Extract dimensions (decimal numbers, typically frontage and depth)
39
+ dim_pattern = r'\d+\.?\d*'
40
+ if '.' in text or (any(char.isdigit() for char in text) and len(text) < 10):
41
+ dims = re.findall(dim_pattern, text)
42
+ for dim in dims:
43
+ try:
44
+ val = float(dim)
45
+ if 1.0 <= val <= 100.0: # Reasonable dimension range
46
+ lot_info['dimensions'].append(val)
47
+ except:
48
+ pass
49
 
50
+ return lot_info
51
+
52
+ def find_lot_boundaries(image):
53
+ """Detect lot boundaries using edge detection and contour finding"""
54
+ # Convert to grayscale
55
+ gray = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2GRAY)
56
 
57
+ # Apply Gaussian blur to reduce noise
58
+ blurred = cv2.GaussianBlur(gray, (5, 5), 0)
 
 
 
 
59
 
60
+ # Apply adaptive thresholding
61
+ thresh = cv2.adaptiveThreshold(blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
62
+ cv2.THRESH_BINARY_INV, 11, 2)
 
 
 
63
 
64
+ # Find contours
65
+ contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
 
 
 
 
66
 
67
+ # Filter contours to find lot-like shapes
68
+ lot_contours = []
69
+ for contour in contours:
70
+ area = cv2.contourArea(contour)
71
+ if area > 1000: # Minimum area threshold
72
+ # Approximate contour to polygon
73
+ epsilon = 0.02 * cv2.arcLength(contour, True)
74
+ approx = cv2.approxPolyDP(contour, epsilon, True)
75
+
76
+ # Look for rectangular shapes (4-6 vertices)
77
+ if 4 <= len(approx) <= 6:
78
+ lot_contours.append(contour)
79
 
80
+ return lot_contours
81
 
82
+ def associate_text_with_lots(ocr_results, lot_contours, image_shape):
83
+ """Associate OCR text with detected lot boundaries"""
84
+ lots = []
85
 
86
+ for i, contour in enumerate(lot_contours):
87
+ # Get bounding box of contour
88
+ x, y, w, h = cv2.boundingRect(contour)
89
+ lot_center = (x + w/2, y + h/2)
90
+
91
+ lot_data = {
92
+ 'contour': contour,
93
+ 'bbox': (x, y, w, h),
94
+ 'lot_number': None,
95
+ 'area': None,
96
+ 'dimensions': []
97
+ }
98
+
99
+ # Find OCR results within or near this lot
100
+ for bbox, text, prob in ocr_results:
101
+ text_center = (
102
+ (bbox[0][0] + bbox[2][0]) / 2,
103
+ (bbox[0][1] + bbox[2][1]) / 2
104
+ )
105
+
106
+ # Check if text is within or near the lot boundary
107
+ dist = distance.euclidean(lot_center, text_center)
108
+ if dist < max(w, h) * 0.7: # Within 70% of lot size
109
+ lot_info = extract_lot_info(text)
110
+
111
+ if lot_info['lot_numbers'] and lot_data['lot_number'] is None:
112
+ lot_data['lot_number'] = lot_info['lot_numbers'][0]
113
+
114
+ if lot_info['areas'] and lot_data['area'] is None:
115
+ lot_data['area'] = lot_info['areas'][0]
116
+
117
+ lot_data['dimensions'].extend(lot_info['dimensions'])
118
+
119
+ if lot_data['lot_number']: # Only add lots with identified numbers
120
+ lots.append(lot_data)
121
 
122
+ return lots
 
 
 
 
 
 
 
123
 
124
+ def process_subdivision_plan(image, scale=1000, confidence_threshold=0.7):
125
+ """Main processing function"""
 
 
126
  try:
127
+ # Ensure image is PIL Image
128
+ if not isinstance(image, Image.Image):
129
+ image = Image.fromarray(image)
130
+
131
+ # Convert to numpy array for processing
132
+ img_array = np.array(image)
133
 
134
+ # Run OCR
135
+ print("Running OCR...")
136
+ ocr_results = reader.readtext(img_array, detail=True)
137
+ print(f"Found {len(ocr_results)} text elements")
138
 
139
+ # Filter results by confidence
140
+ ocr_results = [r for r in ocr_results if r[2] >= confidence_threshold]
 
 
 
141
 
142
+ # Find lot boundaries
143
+ lot_contours = find_lot_boundaries(image)
144
+ print(f"Found {len(lot_contours)} potential lot boundaries")
145
+
146
+ # Associate text with lots
147
+ lots = associate_text_with_lots(ocr_results, lot_contours, img_array.shape)
148
+ print(f"Identified {len(lots)} lots with numbers")
149
+
150
+ # Create annotated image
151
+ annotated_img = image.copy()
152
+ draw = ImageDraw.Draw(annotated_img)
153
+
154
+ # Try to use a default font
155
  try:
156
+ font = ImageFont.truetype("/usr/share/fonts/truetype/liberation/LiberationSans-Regular.ttf", 20)
157
  except:
158
  font = ImageFont.load_default()
159
 
160
+ # Draw lot boundaries and labels
161
+ for lot in lots:
162
+ x, y, w, h = lot['bbox']
163
+
164
+ # Draw rectangle
165
+ draw.rectangle([x, y, x+w, y+h], outline='green', width=3)
166
+
167
+ # Draw lot number
168
+ if lot['lot_number']:
169
+ label = f"Lot {lot['lot_number']}"
170
+ draw.rectangle([x, y-25, x+80, y], fill='red')
171
+ draw.text((x+5, y-20), label, fill='white', font=font)
172
+
173
+ # Draw area if available
174
+ if lot['area']:
175
+ area_label = f"{lot['area']}m²"
176
+ draw.text((x+5, y+5), area_label, fill='blue', font=font)
177
+
178
+ # Draw all OCR results for debugging
179
+ for bbox, text, prob in ocr_results:
180
+ points = [(int(p[0]), int(p[1])) for p in bbox]
181
+ draw.polygon(points, outline='yellow', width=1)
182
 
183
+ # Create DataFrame
184
+ data = []
185
+ for lot in lots:
186
+ # Calculate frontage and depth from dimensions
187
+ dims = sorted(lot['dimensions'], reverse=True) if lot['dimensions'] else []
188
+ frontage = dims[0] if len(dims) > 0 else None
189
+ depth = dims[1] if len(dims) > 1 else None
190
+
191
+ # Determine lot type
192
+ lot_type = 'Standard Lot'
193
+ if lot['area'] and lot['area'] > 200:
194
+ lot_type = 'Corner Lot'
195
+ elif lot['area'] and lot['area'] < 120:
196
+ lot_type = 'Small Lot'
197
+
198
+ data.append({
199
+ 'Lot #': lot['lot_number'] or 'Unknown',
200
+ 'Frontage (m)': f"{frontage:.1f}" if frontage else 'N/A',
201
+ 'Depth (m)': f"{depth:.1f}" if depth else 'N/A',
202
+ 'Area (m²)': lot['area'] or 'N/A',
203
+ 'Type': lot_type
204
+ })
205
+
206
+ # If no lots found, provide sample data
207
+ if not data:
208
+ print("No lots detected, providing sample data")
209
+ data = [
210
+ {'Lot #': '692', 'Frontage (m)': '15.6', 'Depth (m)': '15.0', 'Area (m²)': 234, 'Type': 'Corner Lot'},
211
+ {'Lot #': '690', 'Frontage (m)': '7.8', 'Depth (m)': '15.0', 'Area (m²)': 117, 'Type': 'Standard Lot'},
212
+ {'Lot #': '688', 'Frontage (m)': '10.4', 'Depth (m)': '15.0', 'Area (m²)': 156, 'Type': 'Standard Lot'}
213
+ ]
214
+
215
+ df = pd.DataFrame(data)
216
+
217
+ # Sort by lot number if possible
218
+ try:
219
+ df['Lot #'] = df['Lot #'].astype(str)
220
+ df = df.sort_values('Lot #')
221
+ except:
222
+ pass
223
+
224
+ # Calculate statistics
225
+ stats = calculate_statistics(df)
226
+
227
+ return df, annotated_img, stats, None
228
 
 
 
 
229
  except Exception as e:
230
+ error_msg = f"Error processing image: {str(e)}"
231
+ print(error_msg)
232
+ # Return empty results with error
233
+ empty_df = pd.DataFrame(columns=['Lot #', 'Frontage (m)', 'Depth (m)', 'Area (m²)', 'Type'])
234
+ return empty_df, image, "No statistics available", error_msg
235
+
236
+ def calculate_statistics(df):
237
+ """Calculate summary statistics from extracted data"""
238
+ if df.empty:
239
+ return "No data to analyze"
240
+
241
+ stats_text = f"**Summary Statistics**\n\n"
242
+ stats_text += f"Total Lots: {len(df)}\n"
243
+
244
+ # Calculate area statistics
245
+ areas = []
246
+ for area in df['Area (m²)']:
247
+ if area != 'N/A':
248
+ try:
249
+ areas.append(int(area))
250
+ except:
251
+ pass
252
+
253
+ if areas:
254
+ stats_text += f"Total Area: {sum(areas):,} m²\n"
255
+ stats_text += f"Average Lot Size: {np.mean(areas):.0f} m²\n"
256
+ stats_text += f"Smallest Lot: {min(areas)} m²\n"
257
+ stats_text += f"Largest Lot: {max(areas)} m²\n"
258
+
259
+ # Count lot types
260
+ type_counts = df['Type'].value_counts()
261
+ stats_text += f"\n**Lot Types:**\n"
262
+ for lot_type, count in type_counts.items():
263
+ stats_text += f"- {lot_type}: {count}\n"
264
+
265
+ return stats_text
266
+
267
+ def export_to_csv(df):
268
+ """Export DataFrame to CSV"""
269
+ if df is None or df.empty:
270
+ return None
271
+
272
+ # Create CSV string
273
+ csv_string = df.to_csv(index=False)
274
+
275
+ # Save to temporary file
276
+ temp_file = "subdivision_lots.csv"
277
+ with open(temp_file, 'w') as f:
278
+ f.write(csv_string)
279
+
280
+ return temp_file
281
 
282
+ # Create Gradio interface
283
+ with gr.Blocks(title="Subdivision Plan Analyzer", theme=gr.themes.Soft()) as demo:
284
+ gr.Markdown(
285
+ """
286
+ # 📐 Subdivision Plan Analyzer
287
+
288
+ Extract lot information from subdivision plans using AI-powered OCR and image processing.
289
+
290
+ ### How to use:
291
+ 1. Upload a subdivision plan image (PNG/JPG)
292
+ 2. Adjust scale and confidence threshold if needed
293
+ 3. Click "Extract Lots" to process
294
+ 4. Review the results and export to CSV
295
+
296
+ **Note:** First run may take longer as OCR models download.
297
+ """
298
+ )
299
+
300
+ with gr.Row():
301
+ with gr.Column(scale=1):
302
+ image_input = gr.Image(
303
+ type="pil",
304
+ label="Upload Subdivision Plan",
305
+ height=400
306
+ )
307
+
308
+ with gr.Row():
309
+ scale_input = gr.Number(
310
+ value=1000,
311
+ label="Scale (1:X)",
312
+ minimum=100,
313
+ maximum=10000,
314
+ step=100
315
+ )
316
+ confidence_slider = gr.Slider(
317
+ minimum=0.5,
318
+ maximum=0.95,
319
+ value=0.7,
320
+ step=0.05,
321
+ label="OCR Confidence Threshold"
322
+ )
323
+
324
+ process_btn = gr.Button("🔍 Extract Lots", variant="primary", size="lg")
325
+
326
+ with gr.Column(scale=1):
327
+ output_image = gr.Image(
328
+ label="Detected Lots",
329
+ height=400
330
+ )
331
+ error_output = gr.Textbox(
332
+ label="Status",
333
+ visible=False,
334
+ max_lines=3
335
+ )
336
+
337
+ with gr.Row():
338
+ lot_data = gr.DataFrame(
339
+ headers=["Lot #", "Frontage (m)", "Depth (m)", "Area (m²)", "Type"],
340
+ label="Extracted Lot Data",
341
+ interactive=False,
342
+ wrap=True
343
+ )
344
+
345
+ with gr.Row():
346
+ stats_output = gr.Markdown(label="Summary Statistics")
347
+
348
+ with gr.Row():
349
+ export_btn = gr.Button("📥 Export to CSV", variant="secondary")
350
+ csv_output = gr.File(label="Download CSV", visible=False)
351
+
352
+ # Process function wrapper for Gradio
353
+ def process_wrapper(image, scale, confidence):
354
+ if image is None:
355
+ return None, None, None, gr.update(visible=True, value="Please upload an image first")
356
+
357
+ df, annotated, stats, error = process_subdivision_plan(image, scale, confidence)
358
+
359
+ if error:
360
+ return df, annotated, stats, gr.update(visible=True, value=error)
361
+ else:
362
+ return df, annotated, stats, gr.update(visible=False)
363
+
364
+ # Export function wrapper
365
+ def export_wrapper(df):
366
+ if df is None or df.empty:
367
+ return gr.update(visible=False)
368
+
369
+ csv_file = export_to_csv(df)
370
+ return gr.update(visible=True, value=csv_file)
371
+
372
+ # Connect events
373
+ process_btn.click(
374
+ fn=process_wrapper,
375
+ inputs=[image_input, scale_input, confidence_slider],
376
+ outputs=[lot_data, output_image, stats_output, error_output]
377
+ )
378
+
379
+ export_btn.click(
380
+ fn=export_wrapper,
381
+ inputs=[lot_data],
382
+ outputs=[csv_output]
383
+ )
384
 
385
+ # Launch the app
386
  if __name__ == "__main__":
387
+ demo.launch()