Umer2762 commited on
Commit
5626a1a
·
verified ·
1 Parent(s): b46785e

Upload folder using huggingface_hub

Browse files
DataModels/AnnotatedData.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List, Dict, Optional
2
+ from .ImageMetadata import ImageMetadata
3
+
4
+ class AnnotatedData:
5
+ def __init__(self, data: Dict):
6
+ self.image_ids = data.get("_via_image_id_list", [])
7
+ self.metadata = {
8
+ img_id: ImageMetadata(
9
+ data["_via_img_metadata"][img_id]["filename"],
10
+ data["_via_img_metadata"][img_id]["size"],
11
+ data["_via_img_metadata"][img_id].get("regions", [])
12
+ ) for img_id in self.image_ids if img_id in data["_via_img_metadata"]
13
+ }
14
+
15
+ def __repr__(self):
16
+ return f"AnnotatedData(Images={list(self.metadata.keys())})"
DataModels/ImageMetadata.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List, Dict
2
+ from .Region import Region
3
+
4
+ class ImageMetadata:
5
+ def __init__(self, filename: str, size: int, regions: List[Dict]):
6
+ self.filename = filename
7
+ self.size = size
8
+ self.regions = [Region(region.get("shape_attributes", {}), region.get("region_attributes", {})) for region in regions]
9
+
10
+ def to_dict(self):
11
+ return {
12
+ "filename": self.filename,
13
+ "size": self.size,
14
+ "regions": [region.to_dict() for region in self.regions] # Convert Region objects to dictionaries
15
+ }
16
+
17
+ def __repr__(self):
18
+ return f"ImageMetadata(Filename={self.filename}, Size={self.size}, Regions={self.regions})"
DataModels/Region.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Dict
2
+ from .ShapeAttributes import ShapeAttributes
3
+ from .RegionAttributes import RegionAttributes
4
+
5
+ class Region:
6
+ def __init__(self, shape_attributes: Dict, region_attributes: Dict):
7
+ self.shape_attributes = ShapeAttributes(
8
+ shape_attributes.get("x"),
9
+ shape_attributes.get("y"),
10
+ shape_attributes.get("width"),
11
+ shape_attributes.get("height"),
12
+ )
13
+ self.region_attributes = RegionAttributes(
14
+ region_attributes.get("Language", "English"),
15
+ region_attributes.get("Dosage", ""),
16
+ region_attributes.get("Dignostic", ""),
17
+ region_attributes.get("Symptoms", ""),
18
+ region_attributes.get("Medicine Name", ""),
19
+ region_attributes.get("Text", ""),
20
+ region_attributes.get("Personal Information", "N/A"),
21
+ region_attributes.get("Numeric Data", "N/A"),
22
+ )
23
+
24
+ def to_dict(self):
25
+ return {
26
+ "shape_attributes": self.shape_attributes.to_dict(),
27
+ "region_attributes": self.region_attributes.to_dict(),
28
+ }
29
+
30
+ def __repr__(self):
31
+ return f"Region({self.shape_attributes}, {self.region_attributes})"
DataModels/RegionAttributes.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ class RegionAttributes:
2
+ def __init__(self, language: str, dosage: str, dignostic: str, symptoms: str, medicine_name: str, text: str, personal_info: str, numeric_data: str):
3
+ self.language = language
4
+ self.dosage = dosage
5
+ self.dignostic = dignostic
6
+ self.symptoms = symptoms
7
+ self.medicine_name = medicine_name
8
+ self.text = text
9
+ self.personal_info = personal_info
10
+ self.numeric_data = numeric_data
11
+
12
+ def to_dict(self):
13
+ return {
14
+ "Language": self.language,
15
+ "Dosage": self.dosage,
16
+ "Dignostic": self.dignostic,
17
+ "Symptoms": self.symptoms,
18
+ "Medicine Name": self.medicine_name,
19
+ "Text": self.text,
20
+ "Personal Information": self.personal_info,
21
+ "Numeric Data": self.numeric_data,
22
+ }
23
+
24
+ def __repr__(self):
25
+ return f"RegionAttributes(Language={self.language}, Dosage={self.dosage}, Dignostic={self.dignostic}, Symptoms={self.symptoms}, Medicine Name={self.medicine_name}, Text={self.text}, Personal Info={self.personal_info}, Numeric Data={self.numeric_data})"
DataModels/ShapeAttributes.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ class ShapeAttributes:
2
+ def __init__(self, x: int, y: int, width: int, height: int):
3
+ self.x = x
4
+ self.y = y
5
+ self.width = width
6
+ self.height = height
7
+
8
+ def to_dict(self):
9
+ return {
10
+ "x": self.x,
11
+ "y": self.y,
12
+ "width": self.width,
13
+ "height": self.height,
14
+ }
15
+
16
+ def __repr__(self):
17
+ return f"ShapeAttributes(x={self.x}, y={self.y}, width={self.width}, height={self.height})"
DataModels/__pycache__/AnnotatedData.cpython-312.pyc ADDED
Binary file (1.38 kB). View file
 
DataModels/__pycache__/ImageMetadata.cpython-312.pyc ADDED
Binary file (1.63 kB). View file
 
DataModels/__pycache__/Region.cpython-312.pyc ADDED
Binary file (1.97 kB). View file
 
DataModels/__pycache__/RegionAttributes.cpython-312.pyc ADDED
Binary file (2.08 kB). View file
 
DataModels/__pycache__/ShapeAttributes.cpython-312.pyc ADDED
Binary file (1.25 kB). View file
 
OutputMetadataCreator.py ADDED
@@ -0,0 +1,139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import csv
3
+ import json
4
+ from PIL import Image
5
+ from DataModels.AnnotatedData import AnnotatedData
6
+ from DataModels.Region import Region
7
+
8
+ def crop_and_save_regions(image_path: str, regions: list[Region], output_folder: str, base_filename: str):
9
+ """
10
+ Crops regions from an image and saves them to the output folder.
11
+ Returns a list of tuples containing the cropped image path and the corresponding text.
12
+ """
13
+ cropped_data = []
14
+ if not os.path.exists(image_path):
15
+ print(f"Skipping {image_path}: Image file not found.")
16
+ return cropped_data
17
+
18
+ img = Image.open(image_path)
19
+ for idx, region in enumerate(regions):
20
+ try:
21
+ # Extract region coordinates
22
+ x, y, width, height = (
23
+ region.shape_attributes.x,
24
+ region.shape_attributes.y,
25
+ region.shape_attributes.width,
26
+ region.shape_attributes.height,
27
+ )
28
+ # Crop the region
29
+ cropped_img = img.crop((x, y, x + width, y + height))
30
+ cropped_img = cropped_img.convert("RGB")
31
+
32
+ # Generate the cropped image name
33
+ cropped_image_name = f"{base_filename}_{idx + 1}.jpg"
34
+ cropped_image_path = os.path.join(output_folder, cropped_image_name)
35
+
36
+ # Save the cropped image
37
+ cropped_img.save(cropped_image_path)
38
+ languageInt = 0
39
+ # Extract text from region attributes
40
+ if region.region_attributes.language == "English":
41
+ languageInt = 0
42
+ else:
43
+ languageInt = 1
44
+ int_type = 0
45
+ text = ""
46
+ if region.region_attributes.medicine_name:
47
+ int_type = 0
48
+ text = region.region_attributes.medicine_name
49
+ elif region.region_attributes.dosage:
50
+ int_type = 1
51
+ text = region.region_attributes.dosage
52
+ elif region.region_attributes.dignostic:
53
+ int_type = 2
54
+ text = region.region_attributes.dignostic
55
+ elif region.region_attributes.symptoms:
56
+ int_type = 3
57
+ text = region.region_attributes.symptoms
58
+ elif region.region_attributes.personal_info:
59
+ int_type = 4
60
+ text = region.region_attributes.personal_info
61
+ elif region.region_attributes.numeric_data:
62
+ int_type = 5
63
+ text = region.region_attributes.numeric_data
64
+ elif region.region_attributes.text:
65
+ int_type = 6
66
+ text = region.region_attributes.text
67
+ text.replace("\n","").replace("\"","").replace(",","`")
68
+ # Add to the list of cropped data
69
+ cropped_data.append((cropped_image_path, text, int_type, languageInt))
70
+ except Exception as e:
71
+ print(f"Error cropping region {idx + 1} from {image_path}: {e}")
72
+
73
+ return cropped_data
74
+
75
+ def process_folders_to_csv_and_crop(base_folder: str, output_csv: str, cropped_images_folder: str):
76
+ """
77
+ Processes multiple dr folders containing JSON annotations and images.
78
+ Crops regions from images, saves them to a folder, and consolidates into a single CSV file.
79
+ """
80
+ os.makedirs(cropped_images_folder, exist_ok=True) # Ensure cropped images folder exists
81
+
82
+ # Initialize CSV data
83
+ csv_data = [["Cropped Image Path", "Text","type","language"]]
84
+
85
+ # Loop through all folders starting with 'dr'
86
+ for folder_name in os.listdir(base_folder):
87
+ folder_path = os.path.join(base_folder, folder_name)
88
+ if not os.path.isdir(folder_path) or not folder_name.startswith("dr"):
89
+ continue # Skip if not a valid dr folder
90
+
91
+ json_path = os.path.join(folder_path, f"{folder_name}.json")
92
+ if not os.path.exists(json_path):
93
+ print(f"Skipping {folder_path}: No JSON file found.")
94
+ continue
95
+
96
+ with open(json_path, "r", encoding="utf-8") as file:
97
+ data = json.load(file)
98
+ annotated_data = AnnotatedData(data)
99
+
100
+ # Process each image in the annotated data
101
+ for image_id, metadata in annotated_data.metadata.items():
102
+ image_path = os.path.join(folder_path, metadata.filename)
103
+ base_filename = os.path.splitext(metadata.filename)[0] # Remove file extension
104
+
105
+ # Crop regions and save to folder
106
+ cropped_data = crop_and_save_regions(image_path, metadata.regions, cropped_images_folder, base_filename)
107
+ # Add cropped data to CSV data
108
+ csv_data.extend(cropped_data)
109
+
110
+ # Write to a single CSV file
111
+ with open(output_csv, "w", newline="", encoding="utf-8") as file:
112
+ writer = csv.writer(file)
113
+ writer.writerows(csv_data)
114
+
115
+ print(f"CSV file created: {output_csv}")
116
+
117
+ def clean_second_column(overall_output_csv, output_file):
118
+ with open(overall_output_csv, mode='r', newline='', encoding='utf-8') as infile, \
119
+ open(output_file, mode='w', newline='', encoding='utf-8') as outfile:
120
+
121
+ reader = csv.reader(infile)
122
+ writer = csv.writer(outfile)
123
+
124
+ for row in reader:
125
+ if len(row) > 1: # Ensure the second column exists
126
+ row[1] = row[1].replace(',', '').replace('"', '').replace('\n', ' ')
127
+ row[0] = row[0].replace("./","").replace("\\","/")
128
+ if len(row[1].strip()) > 0:
129
+ writer.writerow(row)
130
+ os.remove(overall_output_csv)
131
+
132
+ # Usage Example
133
+ base_folder = "./base_data" # Base directory containing dr folders
134
+ overall_output_csv = "./all_cropped_data.csv" # Single output CSV file
135
+ overall_output_csv_cleaned = "./all_cropped_data_cleaned.csv" # Single output CSV file
136
+ cropped_images_folder = "./all_cropped_images" # Folder to save all cropped images
137
+
138
+ process_folders_to_csv_and_crop(base_folder, overall_output_csv, cropped_images_folder)
139
+ clean_second_column(overall_output_csv,overall_output_csv_cleaned)
README.md CHANGED
@@ -1,12 +1,6 @@
1
  ---
2
- title: Doctor Handwriting Text Detection
3
- emoji: 🚀
4
- colorFrom: pink
5
- colorTo: green
6
  sdk: gradio
7
  sdk_version: 5.16.0
8
- app_file: app.py
9
- pinned: false
10
  ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Doctor_Handwriting_Text_Detection
3
+ app_file: predict.py
 
 
4
  sdk: gradio
5
  sdk_version: 5.16.0
 
 
6
  ---
 
 
RemoveNotCorrectImages..py ADDED
@@ -0,0 +1,279 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import pandas as pd
3
+ def delete_images_with_patterns(directory: str, patterns: list):
4
+ """
5
+ Deletes image files in the given directory if their filenames contain any of the specified patterns.
6
+
7
+ Args:
8
+ directory (str): The path to the directory containing images.
9
+ patterns (list): A list of substrings to check in filenames.
10
+ """
11
+ if not os.path.exists(directory):
12
+ print(f"Directory '{directory}' does not exist.")
13
+ return
14
+
15
+ for filename in os.listdir(directory):
16
+ file_path = os.path.join(directory, filename)
17
+
18
+ # Check if the filename contains any of the specified patterns
19
+ if any("dr"+pattern in filename for pattern in patterns):
20
+ try:
21
+ os.remove(file_path)
22
+ print(f"Deleted: {file_path}")
23
+ except Exception as e:
24
+ print(f"Error deleting {file_path}: {e}")
25
+
26
+ def clean_csv(csv_path: str, patterns: list):
27
+ """
28
+ Removes rows from the CSV if the first column contains filenames matching any pattern (e.g., "1_1" -> "dr1_1").
29
+ Ensures that there are no additional digits after the pattern unless separated by an underscore `_`.
30
+ """
31
+ if not os.path.exists(csv_path):
32
+ print(f"CSV file '{csv_path}' does not exist.")
33
+ return
34
+
35
+ # Load CSV into a DataFrame
36
+ df = pd.read_csv(csv_path)
37
+
38
+ # Ensure the first column is treated as a string
39
+ df.iloc[:, 0] = df.iloc[:, 0].astype(str)
40
+
41
+ # Create modified patterns to match filenames
42
+ modified_patterns = [f"dr{p}" for p in patterns]
43
+
44
+ # Build a regex pattern to match filenames exactly or with an underscore and additional digits
45
+ regex_patterns = []
46
+ for pattern in modified_patterns:
47
+ # Match the pattern exactly or with an underscore and additional digits
48
+ regex_patterns.append(f"^{pattern}(_\\d+)?$")
49
+
50
+ # Combine all regex patterns into a single pattern
51
+ combined_regex = '|'.join(regex_patterns)
52
+
53
+ # Filter out rows where the first column matches any of the regex patterns
54
+ df = df[~df.iloc[:, 0].str.match(combined_regex, na=False)]
55
+
56
+ # Remove duplicates
57
+ df.drop_duplicates(inplace=True)
58
+
59
+ # Save cleaned data back to CSV
60
+ df.to_csv(csv_path, index=False)
61
+ print(f"Updated CSV saved: {csv_path}")
62
+
63
+ # List of text patterns to match in filenames
64
+ patterns_to_delete = [
65
+ "1_1",
66
+ "4_1",
67
+ "4_2",
68
+ "4_3",
69
+ "4_4",
70
+ "4_5",
71
+ "4_6",
72
+ "5_1",
73
+ "5_2",
74
+ "7_1",
75
+ "10_1",
76
+ "24_1",
77
+ "24_2",
78
+ "25_1",
79
+ "25_2",
80
+ "29_1",
81
+ "30_1",
82
+ "33_1",
83
+ "36_1",
84
+ "36_4",
85
+ "36_5",
86
+ "36_6",
87
+ "38_1",
88
+ "38_2",
89
+ "38_3",
90
+ "38_4",
91
+ "38_5",
92
+ "38_6",
93
+ "38_7",
94
+ "38_8",
95
+ "38_9",
96
+ "42_1",
97
+ "42_2",
98
+ "42_4",
99
+ "43_1",
100
+ "43_2",
101
+ "43_3",
102
+ "43_4",
103
+ "43_5",
104
+ "44_1",
105
+ "44_2",
106
+ "44_3",
107
+ "44_4",
108
+ "44_6",
109
+ "45_1",
110
+ "47_1",
111
+ "50_1",
112
+ "57_1",
113
+ "57_2",
114
+ "63_1",
115
+ "64_1",
116
+ "64_2",
117
+ "64_3",
118
+ "64_4",
119
+ "64_5",
120
+ "64_6",
121
+ "64_7",
122
+ "64_8",
123
+ "64_9",
124
+ "65_1",
125
+ "65_2",
126
+ "66_1",
127
+ "66_2",
128
+ "66_3",
129
+ "66_4",
130
+ "66_5",
131
+ "66_6",
132
+ "66_7",
133
+ "66_8",
134
+ "69_1",
135
+ "69_2",
136
+ "69_3",
137
+ "69_4",
138
+ "69_5",
139
+ "69_6",
140
+ "69_7",
141
+ "69_8",
142
+ "69_9",
143
+ "71_1",
144
+ "71_2",
145
+ "71_3",
146
+ "71_4",
147
+ "71_5",
148
+ "73_1",
149
+ "74_1",
150
+ "75_1",
151
+ "75_2",
152
+ "75_3",
153
+ "75_4",
154
+ "75_5",
155
+ "75_6",
156
+ "77_1",
157
+ "77_2",
158
+ "77_3",
159
+ "76_1",
160
+ "76_2",
161
+ "76_3",
162
+ "76_4",
163
+ "76_5",
164
+ "80_1",
165
+ "80_2",
166
+ "82_1",
167
+ "86_1",
168
+ "86_2",
169
+ "86_3",
170
+ "86_4",
171
+ "86_5",
172
+ "87_1",
173
+ "87_2",
174
+ "87_3",
175
+ "87_4",
176
+ "87_5",
177
+ "87_6",
178
+ "89_1",
179
+ "92_1",
180
+ "92_2",
181
+ "93_1",
182
+ "94_2",
183
+ "94_1",
184
+ "95_1",
185
+ "97_1",
186
+ "97_2",
187
+ "102_1",
188
+ "104_1",
189
+ "108_1",
190
+ "109_1",
191
+ "112_1",
192
+ "114_1",
193
+ "114_2",
194
+ "114_3",
195
+ "114_4",
196
+ "114_5",
197
+ "114_6",
198
+ "114_7",
199
+ "114_8",
200
+ "114_9",
201
+ "115_1",
202
+ "115_2",
203
+ "116_1",
204
+ "116_2",
205
+ "116_3",
206
+ "117_1",
207
+ "128_1",
208
+ "130_1",
209
+ "132_1",
210
+ "132_2",
211
+ "132_3",
212
+ "137_1",
213
+ "137_2",
214
+ "137_3",
215
+ "137_4",
216
+ "137_5",
217
+ "137_6",
218
+ "137_7",
219
+ "137_8",
220
+ "137_9",
221
+ "140_5",
222
+ "146_1",
223
+ "146_2",
224
+ "146_3",
225
+ "151_1",
226
+ "151_2",
227
+ "163_1",
228
+ "169_1",
229
+ "173_1",
230
+ "173_2",
231
+ "100_1"
232
+ ]
233
+
234
+ # Specify your target directory
235
+ target_directory = "./cropped_images" # Change this to your actual directory
236
+
237
+ # Run the deletion function
238
+ # delete_images_with_patterns(target_directory, patterns_to_delete)
239
+ patterns_to_delete = [
240
+ "dr80_2",
241
+ "dr80_3",
242
+ "dr81_1",
243
+ "dr81_1",
244
+ "dr81_2",
245
+ "dr83_1",
246
+ "dr86_1",
247
+ "dr86_2",
248
+ "dr86_3",
249
+ "dr86_4",
250
+ "dr86_5",
251
+ "dr87_1",
252
+ "dr87_2",
253
+ "dr87_3",
254
+ "dr87_4",
255
+ "dr87_5",
256
+ "dr87_6",
257
+ "dr88_1",
258
+ "dr89_1",
259
+ "dr89_2",
260
+ "dr9_1",
261
+ "dr90_1",
262
+ "dr92_1",
263
+ "dr92_1",
264
+ "dr92_2",
265
+ "dr92_3",
266
+ "dr93_1",
267
+ "dr93_2",
268
+ "dr94_1",
269
+ "dr94_2",
270
+ "dr94_3",
271
+ "dr95_1",
272
+ "dr95_2",
273
+ "dr96_1",
274
+ "dr97_1",
275
+ "dr97_2",
276
+ "dr97_3",
277
+ "dr98_1",
278
+ ]
279
+ clean_csv("all_cropped_data.csv",patterns=patterns_to_delete)
__pycache__/AnnotatedData.cpython-312.pyc ADDED
Binary file (1.36 kB). View file
 
__pycache__/ImageMetadata.cpython-312.pyc ADDED
Binary file (1.61 kB). View file
 
__pycache__/Region.cpython-312.pyc ADDED
Binary file (1.96 kB). View file
 
__pycache__/RegionAttributes.cpython-312.pyc ADDED
Binary file (2.07 kB). View file
 
__pycache__/ShapeAttributes.cpython-312.pyc ADDED
Binary file (1.24 kB). View file
 
model/text_type_lang_model.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8a157f2b6cfbdf954a1fd867f7b4edb9fd92b1dbe33c4dbaa16fdd1e1bc671f
3
+ size 885270680
predict.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import numpy as np
3
+ import tensorflow as tf
4
+ from keras.api.models import load_model
5
+ from PIL import Image
6
+ import string
7
+ import pandas as pd
8
+
9
+ class TextTypeLangModel:
10
+ def __init__(self, model_path, csv_path=None):
11
+ # Load pre-trained model
12
+ self.model = load_model(model_path)
13
+ self.characters = string.ascii_letters + string.digits + " " + \
14
+ "آبپچڈڑڤکگہہٹژزسٹطظعغفقکگلاںمںنۓہھوؤ" + \
15
+ "ےیئؤٹپجچحخدڈذرزسشصضطظعغفقکلمنوٕں" + \
16
+ "ۓۓہ۔،؛؟"
17
+
18
+ self.num_chars = len(self.characters) + 1 # Extra for blank
19
+ self.char_to_index = {c: i+1 for i, c in enumerate(self.characters)}
20
+ self.index_to_char = {i+1: c for i, c in enumerate(self.characters)}
21
+
22
+ def encode_text(self, text, max_len=10):
23
+ text = text[:max_len].ljust(max_len) # Pad or trim text
24
+ return [self.char_to_index.get(c, 0) for c in text] # Convert to indices
25
+
26
+ def preprocess_image(self, image):
27
+ # Directly use the PIL image object
28
+ image = image.convert("RGB") # Ensure image is in RGB mode
29
+ image = image.resize((128, 128))
30
+ image = np.array(image) / 255.0 # Normalize
31
+ return image
32
+
33
+ def predict(self, image):
34
+ image = self.preprocess_image(image)
35
+ image = np.expand_dims(image, axis=0) # Add batch dimension
36
+ pred_text, pred_type, pred_lang = self.model.predict(image)
37
+
38
+ # Decode text prediction
39
+ pred_text = ''.join(self.index_to_char.get(np.argmax(pred_text[0][i]), '') for i in range(10))
40
+
41
+ return pred_text.strip(), np.argmax(pred_type), np.argmax(pred_lang)
42
+
43
+ def get_type_string(int_type):
44
+ type_switch = {
45
+ 0: "Medicine",
46
+ 1: "Dosage",
47
+ 2: "Diagnostic",
48
+ 3: "Symptoms",
49
+ 4: "Personal Info",
50
+ 5: "Numeric Data",
51
+ 6: "Text"
52
+ }
53
+ return type_switch.get(int_type, "Unknown")
54
+
55
+ def predict_text_type_lang(image):
56
+ model = TextTypeLangModel("./model/text_type_lang_model.h5")
57
+ predicted_text, predicted_type, predicted_language = model.predict(image)
58
+ predicted_type_str = get_type_string(predicted_type)
59
+ predicted_language_str = "English" if predicted_language == 0 else "Urdu"
60
+
61
+ return predicted_text, predicted_type_str, predicted_language_str
62
+
63
+ # Gradio interface
64
+ iface = gr.Interface(
65
+ fn=predict_text_type_lang,
66
+ inputs=gr.Image(type="pil"),
67
+ outputs=["text", "text", "text"],
68
+ title="Text Type & Language Prediction",
69
+ description="Upload an image to predict the extracted text, type, and language.",
70
+ )
71
+
72
+ iface.launch(debug=True)
requirements.txt ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ arabic-reshaper==3.0.0
2
+ asttokens==3.0.0
3
+ comm==0.2.2
4
+ debugpy==1.8.12
5
+ decorator==5.1.1
6
+ executing==2.1.0
7
+ ipykernel==6.29.5
8
+ ipython==8.31.0
9
+ jedi==0.19.2
10
+ jupyter_client==8.6.3
11
+ jupyter_core==5.7.2
12
+ Mako==1.3.6
13
+ MarkupSafe==3.0.2
14
+ matplotlib-inline==0.1.7
15
+ meson==1.6.0
16
+ nest-asyncio==1.6.0
17
+ parso==0.8.4
18
+ pillow==11.0.0
19
+ prompt_toolkit==3.0.48
20
+ pure_eval==0.2.3
21
+ pybind11==2.13.6
22
+ Pygments==2.19.1
23
+ python-bidi==0.6.3
24
+ pywin32==308
25
+ pyzmq==26.2.0
26
+ setuptools==75.6.0
27
+ stack-data==0.6.3
28
+ tornado==6.4.2
29
+ traitlets==5.14.3
30
+ wcwidth==0.2.13
31
+ accelerate==1.2.1
32
+ acres==0.2.0
33
+ aiofiles==24.1.0
34
+ aiohappyeyeballs==2.4.4
35
+ aiohttp==3.11.11
36
+ aiosignal==1.3.2
37
+ annotated-types==0.7.0
38
+ antlr4-python3-runtime==4.9.3
39
+ anyio==4.8.0
40
+ attrs==25.1.0
41
+ certifi==2024.12.14
42
+ charset-normalizer==3.4.1
43
+ ci-info==0.3.0
44
+ click==8.1.8
45
+ colorama==0.4.6
46
+ configobj==5.0.9
47
+ configparser==7.1.0
48
+ contourpy==1.3.1
49
+ cycler==0.12.1
50
+ datasets==3.2.0
51
+ diffusers==0.32.1
52
+ dill==0.3.8
53
+ docker-pycreds==0.4.0
54
+ efficientnet_pytorch==0.7.1
55
+ einops==0.8.0
56
+ etelemetry==0.3.1
57
+ evaluate==0.4.3
58
+ filelock==3.16.1
59
+ fonttools==4.55.3
60
+ frontend==0.0.3
61
+ frozenlist==1.5.0
62
+ fsspec==2024.9.0
63
+ gitdb==4.0.12
64
+ GitPython==3.1.44
65
+ h11==0.14.0
66
+ httplib2==0.22.0
67
+ huggingface-hub==0.27.1
68
+ idna==3.10
69
+ imageio==2.36.1
70
+ importlib_metadata==8.5.0
71
+ isodate==0.6.1
72
+ itsdangerous==2.2.0
73
+ Jinja2==3.1.5
74
+ jiwer==3.1.0
75
+ joblib==1.4.2
76
+ kiwisolver==1.4.8
77
+ lazy_loader==0.4
78
+ looseversion==1.3.0
79
+ lxml==5.3.0
80
+ matplotlib==3.10.0
81
+ mpmath==1.3.0
82
+ multidict==6.1.0
83
+ multiprocess==0.70.16
84
+ munch==4.0.0
85
+ natsort==8.4.0
86
+ networkx==3.4.2
87
+ nibabel==5.3.2
88
+ nipype==1.9.2
89
+ numpy==2.2.1
90
+ omegaconf==2.3.0
91
+ opencv-python==4.10.0.84
92
+ packaging==24.2
93
+ pandas==2.2.3
94
+ pathlib==1.0.1
95
+ pip==24.3.1
96
+ platformdirs==4.3.6
97
+ plotly==5.24.1
98
+ pretrainedmodels==0.7.4
99
+ propcache==0.2.1
100
+ protobuf==5.29.3
101
+ prov==2.0.1
102
+ psutil==6.1.1
103
+ puremagic==1.28
104
+ pyarrow==19.0.0
105
+ pydantic==2.10.5
106
+ pydantic_core==2.27.2
107
+ pydot==3.0.4
108
+ PyMuPDF==1.25.2
109
+ PyMySQL==1.1.1
110
+ pyparsing==3.2.1
111
+ python-dateutil==2.9.0.post0
112
+ pytz==2024.2
113
+ pyxnat==1.6.2
114
+ PyYAML==6.0.2
115
+ RapidFuzz==3.12.1
116
+ rdflib==6.3.2
117
+ regex==2024.11.6
118
+ requests==2.32.3
119
+ safetensors==0.5.2
120
+ scikit-image==0.25.0
121
+ scikit-learn==1.6.1
122
+ scipy==1.15.1
123
+ segmentation_models_pytorch==0.4.0
124
+ sentence-transformers==3.1.1
125
+ sentry-sdk==2.19.2
126
+ setproctitle==1.3.4
127
+ simplejson==3.19.3
128
+ six==1.17.0
129
+ smmap==5.0.2
130
+ sniffio==1.3.1
131
+ starlette==0.45.3
132
+ sympy==1.13.1
133
+ tenacity==9.0.0
134
+ threadpoolctl==3.5.0
135
+ tifffile==2025.1.10
136
+ timm==1.0.13
137
+ tokenizers==0.20.3
138
+ torch==2.5.1
139
+ torchvision==0.20.1
140
+ tqdm==4.67.1
141
+ traits==7.0.2
142
+ transformers==4.45.2
143
+ typing_extensions==4.12.2
144
+ tzdata==2025.1
145
+ unet==0.8.1
146
+ urllib3==2.3.0
147
+ uvicorn==0.34.0
148
+ wandb==0.19.2
149
+ xxhash==3.5.0
150
+ yarl==1.18.3
151
+ zipp==3.21.0
train_from_scratch.py ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ import tensorflow as tf
4
+ from keras import Model, Input
5
+ from keras.api.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, Reshape, TimeDistributed
6
+ from sklearn.model_selection import train_test_split
7
+ from keras.api.utils import to_categorical
8
+ from PIL import Image
9
+ import string
10
+
11
+ class TextTypeLangModel:
12
+ def __init__(self, csv_path):
13
+ self.df = pd.read_csv(csv_path)
14
+ self.characters = string.ascii_letters + string.digits + " " + \
15
+ "آبپچڈڑڤکگہہٹژزسٹطظعغفقکگلاںمںنۓہھوؤ" + \
16
+ "ےیئؤٹپجچحخدڈذرزسشصضطظعغفقکلمنوٕں" + \
17
+ "ۓۓہ۔،؛؟"
18
+
19
+ self.num_chars = len(self.characters) + 1 # Extra for blank
20
+ self.char_to_index = {c: i+1 for i, c in enumerate(self.characters)}
21
+ self.index_to_char = {i+1: c for i, c in enumerate(self.characters)}
22
+ self.model = self.build_model()
23
+
24
+ def encode_text(self, text, max_len=10):
25
+ text = text[:max_len].ljust(max_len) # Pad or trim text
26
+ return [self.char_to_index.get(c, 0) for c in text] # Convert to indices
27
+
28
+ def preprocess_image(self, image_path):
29
+ image = Image.open(image_path).convert("RGB")
30
+ image = image.resize((128, 128))
31
+ image = np.array(image) / 255.0 # Normalize
32
+ return image
33
+
34
+ def prepare_data(self):
35
+ X_images = np.array([self.preprocess_image(img) for img in self.df['Cropped Image Path']])
36
+ y_text = np.array([self.encode_text(txt) for txt in self.df['Text']])
37
+ y_type = to_categorical(self.df['type'].values, num_classes=7)
38
+ y_lang = to_categorical(self.df['language'].values, num_classes=2)
39
+ return train_test_split(X_images, to_categorical(y_text, self.num_chars), y_type, y_lang, test_size=0.2, random_state=42)
40
+
41
+ def build_model(self):
42
+ input_layer = Input(shape=(128, 128, 3))
43
+ x = Conv2D(32, (3, 3), activation='relu')(input_layer)
44
+ x = MaxPooling2D()(x)
45
+ x = Conv2D(64, (3, 3), activation='relu')(x)
46
+ x = MaxPooling2D()(x)
47
+ x = Flatten()(x)
48
+
49
+ # Increase the number of units in Dense to match the required total number of features
50
+ x = Dense(1280, activation='relu')(x) # Set to 1280 units (10 * 128) to match reshape size
51
+
52
+ # Reshape the output to have 10 time steps and each time step has 128 features
53
+ x = Reshape((10, 128))(x) # Shape (batch_size, 10, 128)
54
+
55
+ # TimeDistributed applied to Dense layer for text output (10-character sequence)
56
+ time_distributed_text_output = TimeDistributed(Dense(self.num_chars, activation='softmax'))(x)
57
+ text_output = Reshape((10, self.num_chars), name='text_output')(time_distributed_text_output) # 10-character output
58
+
59
+ # Type and language prediction
60
+ output_type = Dense(7, activation='softmax', name='type_output')(x)
61
+ output_lang = Dense(2, activation='softmax', name='lang_output')(x)
62
+
63
+ model = Model(inputs=input_layer, outputs=[text_output, output_type, output_lang])
64
+ model.compile(optimizer='adam',
65
+ loss=['categorical_crossentropy', 'categorical_crossentropy', 'categorical_crossentropy'],
66
+ metrics={'text_output': 'accuracy', 'type_output': 'accuracy', 'lang_output': 'accuracy'})
67
+ return model
68
+
69
+ def train(self, epochs=10, batch_size=32):
70
+ X_train, X_test, y_train_text, y_test_text, y_train_type, y_test_type, y_train_lang, y_test_lang = self.prepare_data()
71
+
72
+ # Reshape the target labels to match the output of the model (batch_size, 10, num_classes)
73
+ y_train_type = np.expand_dims(y_train_type, axis=1)
74
+ y_train_type = np.repeat(y_train_type, 10, axis=1) # Repeat along the time axis
75
+
76
+ y_test_type = np.expand_dims(y_test_type, axis=1)
77
+ y_test_type = np.repeat(y_test_type, 10, axis=1)
78
+
79
+ y_train_lang = np.expand_dims(y_train_lang, axis=1)
80
+ y_train_lang = np.repeat(y_train_lang, 10, axis=1) # Repeat along the time axis
81
+
82
+ y_test_lang = np.expand_dims(y_test_lang, axis=1)
83
+ y_test_lang = np.repeat(y_test_lang, 10, axis=1)
84
+
85
+ # Now, fit the model
86
+ self.model.fit(X_train, [y_train_text, y_train_type, y_train_lang],
87
+ validation_data=(X_test, [y_test_text, y_test_type, y_test_lang]),
88
+ epochs=epochs, batch_size=batch_size)
89
+ self.model.save("text_type_lang_model.h5")
90
+
91
+ def predict(self, image_path):
92
+ image = self.preprocess_image(image_path)
93
+ image = np.expand_dims(image, axis=0)
94
+ pred_text, pred_type, pred_lang = self.model.predict(image)
95
+ pred_text = ''.join(self.index_to_char.get(np.argmax(pred_text[0][i]), '') for i in range(10))
96
+ return pred_text.strip(), np.argmax(pred_type), np.argmax(pred_lang)
97
+
98
+
99
+ # Initialize and train the model
100
+ model = TextTypeLangModel("all_cropped_data_cleaned.csv")
101
+ model.train()
102
+
103
+ # Predict using the first image from the dataset
104
+ first_image_path = model.df['Cropped Image Path'].iloc[0]
105
+ predicted_text, predicted_type, predicted_language = model.predict(first_image_path)
106
+
107
+ print("Predicted Text:", predicted_text)
108
+ print("Predicted Type:", predicted_type)
109
+ print("Predicted Language:", "English" if predicted_language == 0 else "Urdu")