Upload folder using huggingface_hub
Browse files- DataModels/AnnotatedData.py +16 -0
- DataModels/ImageMetadata.py +18 -0
- DataModels/Region.py +31 -0
- DataModels/RegionAttributes.py +25 -0
- DataModels/ShapeAttributes.py +17 -0
- DataModels/__pycache__/AnnotatedData.cpython-312.pyc +0 -0
- DataModels/__pycache__/ImageMetadata.cpython-312.pyc +0 -0
- DataModels/__pycache__/Region.cpython-312.pyc +0 -0
- DataModels/__pycache__/RegionAttributes.cpython-312.pyc +0 -0
- DataModels/__pycache__/ShapeAttributes.cpython-312.pyc +0 -0
- OutputMetadataCreator.py +139 -0
- README.md +2 -8
- RemoveNotCorrectImages..py +279 -0
- __pycache__/AnnotatedData.cpython-312.pyc +0 -0
- __pycache__/ImageMetadata.cpython-312.pyc +0 -0
- __pycache__/Region.cpython-312.pyc +0 -0
- __pycache__/RegionAttributes.cpython-312.pyc +0 -0
- __pycache__/ShapeAttributes.cpython-312.pyc +0 -0
- model/text_type_lang_model.h5 +3 -0
- predict.py +72 -0
- requirements.txt +151 -0
- train_from_scratch.py +109 -0
DataModels/AnnotatedData.py
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import List, Dict, Optional
|
| 2 |
+
from .ImageMetadata import ImageMetadata
|
| 3 |
+
|
| 4 |
+
class AnnotatedData:
|
| 5 |
+
def __init__(self, data: Dict):
|
| 6 |
+
self.image_ids = data.get("_via_image_id_list", [])
|
| 7 |
+
self.metadata = {
|
| 8 |
+
img_id: ImageMetadata(
|
| 9 |
+
data["_via_img_metadata"][img_id]["filename"],
|
| 10 |
+
data["_via_img_metadata"][img_id]["size"],
|
| 11 |
+
data["_via_img_metadata"][img_id].get("regions", [])
|
| 12 |
+
) for img_id in self.image_ids if img_id in data["_via_img_metadata"]
|
| 13 |
+
}
|
| 14 |
+
|
| 15 |
+
def __repr__(self):
|
| 16 |
+
return f"AnnotatedData(Images={list(self.metadata.keys())})"
|
DataModels/ImageMetadata.py
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import List, Dict
|
| 2 |
+
from .Region import Region
|
| 3 |
+
|
| 4 |
+
class ImageMetadata:
|
| 5 |
+
def __init__(self, filename: str, size: int, regions: List[Dict]):
|
| 6 |
+
self.filename = filename
|
| 7 |
+
self.size = size
|
| 8 |
+
self.regions = [Region(region.get("shape_attributes", {}), region.get("region_attributes", {})) for region in regions]
|
| 9 |
+
|
| 10 |
+
def to_dict(self):
|
| 11 |
+
return {
|
| 12 |
+
"filename": self.filename,
|
| 13 |
+
"size": self.size,
|
| 14 |
+
"regions": [region.to_dict() for region in self.regions] # Convert Region objects to dictionaries
|
| 15 |
+
}
|
| 16 |
+
|
| 17 |
+
def __repr__(self):
|
| 18 |
+
return f"ImageMetadata(Filename={self.filename}, Size={self.size}, Regions={self.regions})"
|
DataModels/Region.py
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import Dict
|
| 2 |
+
from .ShapeAttributes import ShapeAttributes
|
| 3 |
+
from .RegionAttributes import RegionAttributes
|
| 4 |
+
|
| 5 |
+
class Region:
|
| 6 |
+
def __init__(self, shape_attributes: Dict, region_attributes: Dict):
|
| 7 |
+
self.shape_attributes = ShapeAttributes(
|
| 8 |
+
shape_attributes.get("x"),
|
| 9 |
+
shape_attributes.get("y"),
|
| 10 |
+
shape_attributes.get("width"),
|
| 11 |
+
shape_attributes.get("height"),
|
| 12 |
+
)
|
| 13 |
+
self.region_attributes = RegionAttributes(
|
| 14 |
+
region_attributes.get("Language", "English"),
|
| 15 |
+
region_attributes.get("Dosage", ""),
|
| 16 |
+
region_attributes.get("Dignostic", ""),
|
| 17 |
+
region_attributes.get("Symptoms", ""),
|
| 18 |
+
region_attributes.get("Medicine Name", ""),
|
| 19 |
+
region_attributes.get("Text", ""),
|
| 20 |
+
region_attributes.get("Personal Information", "N/A"),
|
| 21 |
+
region_attributes.get("Numeric Data", "N/A"),
|
| 22 |
+
)
|
| 23 |
+
|
| 24 |
+
def to_dict(self):
|
| 25 |
+
return {
|
| 26 |
+
"shape_attributes": self.shape_attributes.to_dict(),
|
| 27 |
+
"region_attributes": self.region_attributes.to_dict(),
|
| 28 |
+
}
|
| 29 |
+
|
| 30 |
+
def __repr__(self):
|
| 31 |
+
return f"Region({self.shape_attributes}, {self.region_attributes})"
|
DataModels/RegionAttributes.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
class RegionAttributes:
|
| 2 |
+
def __init__(self, language: str, dosage: str, dignostic: str, symptoms: str, medicine_name: str, text: str, personal_info: str, numeric_data: str):
|
| 3 |
+
self.language = language
|
| 4 |
+
self.dosage = dosage
|
| 5 |
+
self.dignostic = dignostic
|
| 6 |
+
self.symptoms = symptoms
|
| 7 |
+
self.medicine_name = medicine_name
|
| 8 |
+
self.text = text
|
| 9 |
+
self.personal_info = personal_info
|
| 10 |
+
self.numeric_data = numeric_data
|
| 11 |
+
|
| 12 |
+
def to_dict(self):
|
| 13 |
+
return {
|
| 14 |
+
"Language": self.language,
|
| 15 |
+
"Dosage": self.dosage,
|
| 16 |
+
"Dignostic": self.dignostic,
|
| 17 |
+
"Symptoms": self.symptoms,
|
| 18 |
+
"Medicine Name": self.medicine_name,
|
| 19 |
+
"Text": self.text,
|
| 20 |
+
"Personal Information": self.personal_info,
|
| 21 |
+
"Numeric Data": self.numeric_data,
|
| 22 |
+
}
|
| 23 |
+
|
| 24 |
+
def __repr__(self):
|
| 25 |
+
return f"RegionAttributes(Language={self.language}, Dosage={self.dosage}, Dignostic={self.dignostic}, Symptoms={self.symptoms}, Medicine Name={self.medicine_name}, Text={self.text}, Personal Info={self.personal_info}, Numeric Data={self.numeric_data})"
|
DataModels/ShapeAttributes.py
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
class ShapeAttributes:
|
| 2 |
+
def __init__(self, x: int, y: int, width: int, height: int):
|
| 3 |
+
self.x = x
|
| 4 |
+
self.y = y
|
| 5 |
+
self.width = width
|
| 6 |
+
self.height = height
|
| 7 |
+
|
| 8 |
+
def to_dict(self):
|
| 9 |
+
return {
|
| 10 |
+
"x": self.x,
|
| 11 |
+
"y": self.y,
|
| 12 |
+
"width": self.width,
|
| 13 |
+
"height": self.height,
|
| 14 |
+
}
|
| 15 |
+
|
| 16 |
+
def __repr__(self):
|
| 17 |
+
return f"ShapeAttributes(x={self.x}, y={self.y}, width={self.width}, height={self.height})"
|
DataModels/__pycache__/AnnotatedData.cpython-312.pyc
ADDED
|
Binary file (1.38 kB). View file
|
|
|
DataModels/__pycache__/ImageMetadata.cpython-312.pyc
ADDED
|
Binary file (1.63 kB). View file
|
|
|
DataModels/__pycache__/Region.cpython-312.pyc
ADDED
|
Binary file (1.97 kB). View file
|
|
|
DataModels/__pycache__/RegionAttributes.cpython-312.pyc
ADDED
|
Binary file (2.08 kB). View file
|
|
|
DataModels/__pycache__/ShapeAttributes.cpython-312.pyc
ADDED
|
Binary file (1.25 kB). View file
|
|
|
OutputMetadataCreator.py
ADDED
|
@@ -0,0 +1,139 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import csv
|
| 3 |
+
import json
|
| 4 |
+
from PIL import Image
|
| 5 |
+
from DataModels.AnnotatedData import AnnotatedData
|
| 6 |
+
from DataModels.Region import Region
|
| 7 |
+
|
| 8 |
+
def crop_and_save_regions(image_path: str, regions: list[Region], output_folder: str, base_filename: str):
|
| 9 |
+
"""
|
| 10 |
+
Crops regions from an image and saves them to the output folder.
|
| 11 |
+
Returns a list of tuples containing the cropped image path and the corresponding text.
|
| 12 |
+
"""
|
| 13 |
+
cropped_data = []
|
| 14 |
+
if not os.path.exists(image_path):
|
| 15 |
+
print(f"Skipping {image_path}: Image file not found.")
|
| 16 |
+
return cropped_data
|
| 17 |
+
|
| 18 |
+
img = Image.open(image_path)
|
| 19 |
+
for idx, region in enumerate(regions):
|
| 20 |
+
try:
|
| 21 |
+
# Extract region coordinates
|
| 22 |
+
x, y, width, height = (
|
| 23 |
+
region.shape_attributes.x,
|
| 24 |
+
region.shape_attributes.y,
|
| 25 |
+
region.shape_attributes.width,
|
| 26 |
+
region.shape_attributes.height,
|
| 27 |
+
)
|
| 28 |
+
# Crop the region
|
| 29 |
+
cropped_img = img.crop((x, y, x + width, y + height))
|
| 30 |
+
cropped_img = cropped_img.convert("RGB")
|
| 31 |
+
|
| 32 |
+
# Generate the cropped image name
|
| 33 |
+
cropped_image_name = f"{base_filename}_{idx + 1}.jpg"
|
| 34 |
+
cropped_image_path = os.path.join(output_folder, cropped_image_name)
|
| 35 |
+
|
| 36 |
+
# Save the cropped image
|
| 37 |
+
cropped_img.save(cropped_image_path)
|
| 38 |
+
languageInt = 0
|
| 39 |
+
# Extract text from region attributes
|
| 40 |
+
if region.region_attributes.language == "English":
|
| 41 |
+
languageInt = 0
|
| 42 |
+
else:
|
| 43 |
+
languageInt = 1
|
| 44 |
+
int_type = 0
|
| 45 |
+
text = ""
|
| 46 |
+
if region.region_attributes.medicine_name:
|
| 47 |
+
int_type = 0
|
| 48 |
+
text = region.region_attributes.medicine_name
|
| 49 |
+
elif region.region_attributes.dosage:
|
| 50 |
+
int_type = 1
|
| 51 |
+
text = region.region_attributes.dosage
|
| 52 |
+
elif region.region_attributes.dignostic:
|
| 53 |
+
int_type = 2
|
| 54 |
+
text = region.region_attributes.dignostic
|
| 55 |
+
elif region.region_attributes.symptoms:
|
| 56 |
+
int_type = 3
|
| 57 |
+
text = region.region_attributes.symptoms
|
| 58 |
+
elif region.region_attributes.personal_info:
|
| 59 |
+
int_type = 4
|
| 60 |
+
text = region.region_attributes.personal_info
|
| 61 |
+
elif region.region_attributes.numeric_data:
|
| 62 |
+
int_type = 5
|
| 63 |
+
text = region.region_attributes.numeric_data
|
| 64 |
+
elif region.region_attributes.text:
|
| 65 |
+
int_type = 6
|
| 66 |
+
text = region.region_attributes.text
|
| 67 |
+
text.replace("\n","").replace("\"","").replace(",","`")
|
| 68 |
+
# Add to the list of cropped data
|
| 69 |
+
cropped_data.append((cropped_image_path, text, int_type, languageInt))
|
| 70 |
+
except Exception as e:
|
| 71 |
+
print(f"Error cropping region {idx + 1} from {image_path}: {e}")
|
| 72 |
+
|
| 73 |
+
return cropped_data
|
| 74 |
+
|
| 75 |
+
def process_folders_to_csv_and_crop(base_folder: str, output_csv: str, cropped_images_folder: str):
|
| 76 |
+
"""
|
| 77 |
+
Processes multiple dr folders containing JSON annotations and images.
|
| 78 |
+
Crops regions from images, saves them to a folder, and consolidates into a single CSV file.
|
| 79 |
+
"""
|
| 80 |
+
os.makedirs(cropped_images_folder, exist_ok=True) # Ensure cropped images folder exists
|
| 81 |
+
|
| 82 |
+
# Initialize CSV data
|
| 83 |
+
csv_data = [["Cropped Image Path", "Text","type","language"]]
|
| 84 |
+
|
| 85 |
+
# Loop through all folders starting with 'dr'
|
| 86 |
+
for folder_name in os.listdir(base_folder):
|
| 87 |
+
folder_path = os.path.join(base_folder, folder_name)
|
| 88 |
+
if not os.path.isdir(folder_path) or not folder_name.startswith("dr"):
|
| 89 |
+
continue # Skip if not a valid dr folder
|
| 90 |
+
|
| 91 |
+
json_path = os.path.join(folder_path, f"{folder_name}.json")
|
| 92 |
+
if not os.path.exists(json_path):
|
| 93 |
+
print(f"Skipping {folder_path}: No JSON file found.")
|
| 94 |
+
continue
|
| 95 |
+
|
| 96 |
+
with open(json_path, "r", encoding="utf-8") as file:
|
| 97 |
+
data = json.load(file)
|
| 98 |
+
annotated_data = AnnotatedData(data)
|
| 99 |
+
|
| 100 |
+
# Process each image in the annotated data
|
| 101 |
+
for image_id, metadata in annotated_data.metadata.items():
|
| 102 |
+
image_path = os.path.join(folder_path, metadata.filename)
|
| 103 |
+
base_filename = os.path.splitext(metadata.filename)[0] # Remove file extension
|
| 104 |
+
|
| 105 |
+
# Crop regions and save to folder
|
| 106 |
+
cropped_data = crop_and_save_regions(image_path, metadata.regions, cropped_images_folder, base_filename)
|
| 107 |
+
# Add cropped data to CSV data
|
| 108 |
+
csv_data.extend(cropped_data)
|
| 109 |
+
|
| 110 |
+
# Write to a single CSV file
|
| 111 |
+
with open(output_csv, "w", newline="", encoding="utf-8") as file:
|
| 112 |
+
writer = csv.writer(file)
|
| 113 |
+
writer.writerows(csv_data)
|
| 114 |
+
|
| 115 |
+
print(f"CSV file created: {output_csv}")
|
| 116 |
+
|
| 117 |
+
def clean_second_column(overall_output_csv, output_file):
|
| 118 |
+
with open(overall_output_csv, mode='r', newline='', encoding='utf-8') as infile, \
|
| 119 |
+
open(output_file, mode='w', newline='', encoding='utf-8') as outfile:
|
| 120 |
+
|
| 121 |
+
reader = csv.reader(infile)
|
| 122 |
+
writer = csv.writer(outfile)
|
| 123 |
+
|
| 124 |
+
for row in reader:
|
| 125 |
+
if len(row) > 1: # Ensure the second column exists
|
| 126 |
+
row[1] = row[1].replace(',', '').replace('"', '').replace('\n', ' ')
|
| 127 |
+
row[0] = row[0].replace("./","").replace("\\","/")
|
| 128 |
+
if len(row[1].strip()) > 0:
|
| 129 |
+
writer.writerow(row)
|
| 130 |
+
os.remove(overall_output_csv)
|
| 131 |
+
|
| 132 |
+
# Usage Example
|
| 133 |
+
base_folder = "./base_data" # Base directory containing dr folders
|
| 134 |
+
overall_output_csv = "./all_cropped_data.csv" # Single output CSV file
|
| 135 |
+
overall_output_csv_cleaned = "./all_cropped_data_cleaned.csv" # Single output CSV file
|
| 136 |
+
cropped_images_folder = "./all_cropped_images" # Folder to save all cropped images
|
| 137 |
+
|
| 138 |
+
process_folders_to_csv_and_crop(base_folder, overall_output_csv, cropped_images_folder)
|
| 139 |
+
clean_second_column(overall_output_csv,overall_output_csv_cleaned)
|
README.md
CHANGED
|
@@ -1,12 +1,6 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
-
|
| 4 |
-
colorFrom: pink
|
| 5 |
-
colorTo: green
|
| 6 |
sdk: gradio
|
| 7 |
sdk_version: 5.16.0
|
| 8 |
-
app_file: app.py
|
| 9 |
-
pinned: false
|
| 10 |
---
|
| 11 |
-
|
| 12 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
| 1 |
---
|
| 2 |
+
title: Doctor_Handwriting_Text_Detection
|
| 3 |
+
app_file: predict.py
|
|
|
|
|
|
|
| 4 |
sdk: gradio
|
| 5 |
sdk_version: 5.16.0
|
|
|
|
|
|
|
| 6 |
---
|
|
|
|
|
|
RemoveNotCorrectImages..py
ADDED
|
@@ -0,0 +1,279 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import pandas as pd
|
| 3 |
+
def delete_images_with_patterns(directory: str, patterns: list):
|
| 4 |
+
"""
|
| 5 |
+
Deletes image files in the given directory if their filenames contain any of the specified patterns.
|
| 6 |
+
|
| 7 |
+
Args:
|
| 8 |
+
directory (str): The path to the directory containing images.
|
| 9 |
+
patterns (list): A list of substrings to check in filenames.
|
| 10 |
+
"""
|
| 11 |
+
if not os.path.exists(directory):
|
| 12 |
+
print(f"Directory '{directory}' does not exist.")
|
| 13 |
+
return
|
| 14 |
+
|
| 15 |
+
for filename in os.listdir(directory):
|
| 16 |
+
file_path = os.path.join(directory, filename)
|
| 17 |
+
|
| 18 |
+
# Check if the filename contains any of the specified patterns
|
| 19 |
+
if any("dr"+pattern in filename for pattern in patterns):
|
| 20 |
+
try:
|
| 21 |
+
os.remove(file_path)
|
| 22 |
+
print(f"Deleted: {file_path}")
|
| 23 |
+
except Exception as e:
|
| 24 |
+
print(f"Error deleting {file_path}: {e}")
|
| 25 |
+
|
| 26 |
+
def clean_csv(csv_path: str, patterns: list):
|
| 27 |
+
"""
|
| 28 |
+
Removes rows from the CSV if the first column contains filenames matching any pattern (e.g., "1_1" -> "dr1_1").
|
| 29 |
+
Ensures that there are no additional digits after the pattern unless separated by an underscore `_`.
|
| 30 |
+
"""
|
| 31 |
+
if not os.path.exists(csv_path):
|
| 32 |
+
print(f"CSV file '{csv_path}' does not exist.")
|
| 33 |
+
return
|
| 34 |
+
|
| 35 |
+
# Load CSV into a DataFrame
|
| 36 |
+
df = pd.read_csv(csv_path)
|
| 37 |
+
|
| 38 |
+
# Ensure the first column is treated as a string
|
| 39 |
+
df.iloc[:, 0] = df.iloc[:, 0].astype(str)
|
| 40 |
+
|
| 41 |
+
# Create modified patterns to match filenames
|
| 42 |
+
modified_patterns = [f"dr{p}" for p in patterns]
|
| 43 |
+
|
| 44 |
+
# Build a regex pattern to match filenames exactly or with an underscore and additional digits
|
| 45 |
+
regex_patterns = []
|
| 46 |
+
for pattern in modified_patterns:
|
| 47 |
+
# Match the pattern exactly or with an underscore and additional digits
|
| 48 |
+
regex_patterns.append(f"^{pattern}(_\\d+)?$")
|
| 49 |
+
|
| 50 |
+
# Combine all regex patterns into a single pattern
|
| 51 |
+
combined_regex = '|'.join(regex_patterns)
|
| 52 |
+
|
| 53 |
+
# Filter out rows where the first column matches any of the regex patterns
|
| 54 |
+
df = df[~df.iloc[:, 0].str.match(combined_regex, na=False)]
|
| 55 |
+
|
| 56 |
+
# Remove duplicates
|
| 57 |
+
df.drop_duplicates(inplace=True)
|
| 58 |
+
|
| 59 |
+
# Save cleaned data back to CSV
|
| 60 |
+
df.to_csv(csv_path, index=False)
|
| 61 |
+
print(f"Updated CSV saved: {csv_path}")
|
| 62 |
+
|
| 63 |
+
# List of text patterns to match in filenames
|
| 64 |
+
patterns_to_delete = [
|
| 65 |
+
"1_1",
|
| 66 |
+
"4_1",
|
| 67 |
+
"4_2",
|
| 68 |
+
"4_3",
|
| 69 |
+
"4_4",
|
| 70 |
+
"4_5",
|
| 71 |
+
"4_6",
|
| 72 |
+
"5_1",
|
| 73 |
+
"5_2",
|
| 74 |
+
"7_1",
|
| 75 |
+
"10_1",
|
| 76 |
+
"24_1",
|
| 77 |
+
"24_2",
|
| 78 |
+
"25_1",
|
| 79 |
+
"25_2",
|
| 80 |
+
"29_1",
|
| 81 |
+
"30_1",
|
| 82 |
+
"33_1",
|
| 83 |
+
"36_1",
|
| 84 |
+
"36_4",
|
| 85 |
+
"36_5",
|
| 86 |
+
"36_6",
|
| 87 |
+
"38_1",
|
| 88 |
+
"38_2",
|
| 89 |
+
"38_3",
|
| 90 |
+
"38_4",
|
| 91 |
+
"38_5",
|
| 92 |
+
"38_6",
|
| 93 |
+
"38_7",
|
| 94 |
+
"38_8",
|
| 95 |
+
"38_9",
|
| 96 |
+
"42_1",
|
| 97 |
+
"42_2",
|
| 98 |
+
"42_4",
|
| 99 |
+
"43_1",
|
| 100 |
+
"43_2",
|
| 101 |
+
"43_3",
|
| 102 |
+
"43_4",
|
| 103 |
+
"43_5",
|
| 104 |
+
"44_1",
|
| 105 |
+
"44_2",
|
| 106 |
+
"44_3",
|
| 107 |
+
"44_4",
|
| 108 |
+
"44_6",
|
| 109 |
+
"45_1",
|
| 110 |
+
"47_1",
|
| 111 |
+
"50_1",
|
| 112 |
+
"57_1",
|
| 113 |
+
"57_2",
|
| 114 |
+
"63_1",
|
| 115 |
+
"64_1",
|
| 116 |
+
"64_2",
|
| 117 |
+
"64_3",
|
| 118 |
+
"64_4",
|
| 119 |
+
"64_5",
|
| 120 |
+
"64_6",
|
| 121 |
+
"64_7",
|
| 122 |
+
"64_8",
|
| 123 |
+
"64_9",
|
| 124 |
+
"65_1",
|
| 125 |
+
"65_2",
|
| 126 |
+
"66_1",
|
| 127 |
+
"66_2",
|
| 128 |
+
"66_3",
|
| 129 |
+
"66_4",
|
| 130 |
+
"66_5",
|
| 131 |
+
"66_6",
|
| 132 |
+
"66_7",
|
| 133 |
+
"66_8",
|
| 134 |
+
"69_1",
|
| 135 |
+
"69_2",
|
| 136 |
+
"69_3",
|
| 137 |
+
"69_4",
|
| 138 |
+
"69_5",
|
| 139 |
+
"69_6",
|
| 140 |
+
"69_7",
|
| 141 |
+
"69_8",
|
| 142 |
+
"69_9",
|
| 143 |
+
"71_1",
|
| 144 |
+
"71_2",
|
| 145 |
+
"71_3",
|
| 146 |
+
"71_4",
|
| 147 |
+
"71_5",
|
| 148 |
+
"73_1",
|
| 149 |
+
"74_1",
|
| 150 |
+
"75_1",
|
| 151 |
+
"75_2",
|
| 152 |
+
"75_3",
|
| 153 |
+
"75_4",
|
| 154 |
+
"75_5",
|
| 155 |
+
"75_6",
|
| 156 |
+
"77_1",
|
| 157 |
+
"77_2",
|
| 158 |
+
"77_3",
|
| 159 |
+
"76_1",
|
| 160 |
+
"76_2",
|
| 161 |
+
"76_3",
|
| 162 |
+
"76_4",
|
| 163 |
+
"76_5",
|
| 164 |
+
"80_1",
|
| 165 |
+
"80_2",
|
| 166 |
+
"82_1",
|
| 167 |
+
"86_1",
|
| 168 |
+
"86_2",
|
| 169 |
+
"86_3",
|
| 170 |
+
"86_4",
|
| 171 |
+
"86_5",
|
| 172 |
+
"87_1",
|
| 173 |
+
"87_2",
|
| 174 |
+
"87_3",
|
| 175 |
+
"87_4",
|
| 176 |
+
"87_5",
|
| 177 |
+
"87_6",
|
| 178 |
+
"89_1",
|
| 179 |
+
"92_1",
|
| 180 |
+
"92_2",
|
| 181 |
+
"93_1",
|
| 182 |
+
"94_2",
|
| 183 |
+
"94_1",
|
| 184 |
+
"95_1",
|
| 185 |
+
"97_1",
|
| 186 |
+
"97_2",
|
| 187 |
+
"102_1",
|
| 188 |
+
"104_1",
|
| 189 |
+
"108_1",
|
| 190 |
+
"109_1",
|
| 191 |
+
"112_1",
|
| 192 |
+
"114_1",
|
| 193 |
+
"114_2",
|
| 194 |
+
"114_3",
|
| 195 |
+
"114_4",
|
| 196 |
+
"114_5",
|
| 197 |
+
"114_6",
|
| 198 |
+
"114_7",
|
| 199 |
+
"114_8",
|
| 200 |
+
"114_9",
|
| 201 |
+
"115_1",
|
| 202 |
+
"115_2",
|
| 203 |
+
"116_1",
|
| 204 |
+
"116_2",
|
| 205 |
+
"116_3",
|
| 206 |
+
"117_1",
|
| 207 |
+
"128_1",
|
| 208 |
+
"130_1",
|
| 209 |
+
"132_1",
|
| 210 |
+
"132_2",
|
| 211 |
+
"132_3",
|
| 212 |
+
"137_1",
|
| 213 |
+
"137_2",
|
| 214 |
+
"137_3",
|
| 215 |
+
"137_4",
|
| 216 |
+
"137_5",
|
| 217 |
+
"137_6",
|
| 218 |
+
"137_7",
|
| 219 |
+
"137_8",
|
| 220 |
+
"137_9",
|
| 221 |
+
"140_5",
|
| 222 |
+
"146_1",
|
| 223 |
+
"146_2",
|
| 224 |
+
"146_3",
|
| 225 |
+
"151_1",
|
| 226 |
+
"151_2",
|
| 227 |
+
"163_1",
|
| 228 |
+
"169_1",
|
| 229 |
+
"173_1",
|
| 230 |
+
"173_2",
|
| 231 |
+
"100_1"
|
| 232 |
+
]
|
| 233 |
+
|
| 234 |
+
# Specify your target directory
|
| 235 |
+
target_directory = "./cropped_images" # Change this to your actual directory
|
| 236 |
+
|
| 237 |
+
# Run the deletion function
|
| 238 |
+
# delete_images_with_patterns(target_directory, patterns_to_delete)
|
| 239 |
+
patterns_to_delete = [
|
| 240 |
+
"dr80_2",
|
| 241 |
+
"dr80_3",
|
| 242 |
+
"dr81_1",
|
| 243 |
+
"dr81_1",
|
| 244 |
+
"dr81_2",
|
| 245 |
+
"dr83_1",
|
| 246 |
+
"dr86_1",
|
| 247 |
+
"dr86_2",
|
| 248 |
+
"dr86_3",
|
| 249 |
+
"dr86_4",
|
| 250 |
+
"dr86_5",
|
| 251 |
+
"dr87_1",
|
| 252 |
+
"dr87_2",
|
| 253 |
+
"dr87_3",
|
| 254 |
+
"dr87_4",
|
| 255 |
+
"dr87_5",
|
| 256 |
+
"dr87_6",
|
| 257 |
+
"dr88_1",
|
| 258 |
+
"dr89_1",
|
| 259 |
+
"dr89_2",
|
| 260 |
+
"dr9_1",
|
| 261 |
+
"dr90_1",
|
| 262 |
+
"dr92_1",
|
| 263 |
+
"dr92_1",
|
| 264 |
+
"dr92_2",
|
| 265 |
+
"dr92_3",
|
| 266 |
+
"dr93_1",
|
| 267 |
+
"dr93_2",
|
| 268 |
+
"dr94_1",
|
| 269 |
+
"dr94_2",
|
| 270 |
+
"dr94_3",
|
| 271 |
+
"dr95_1",
|
| 272 |
+
"dr95_2",
|
| 273 |
+
"dr96_1",
|
| 274 |
+
"dr97_1",
|
| 275 |
+
"dr97_2",
|
| 276 |
+
"dr97_3",
|
| 277 |
+
"dr98_1",
|
| 278 |
+
]
|
| 279 |
+
clean_csv("all_cropped_data.csv",patterns=patterns_to_delete)
|
__pycache__/AnnotatedData.cpython-312.pyc
ADDED
|
Binary file (1.36 kB). View file
|
|
|
__pycache__/ImageMetadata.cpython-312.pyc
ADDED
|
Binary file (1.61 kB). View file
|
|
|
__pycache__/Region.cpython-312.pyc
ADDED
|
Binary file (1.96 kB). View file
|
|
|
__pycache__/RegionAttributes.cpython-312.pyc
ADDED
|
Binary file (2.07 kB). View file
|
|
|
__pycache__/ShapeAttributes.cpython-312.pyc
ADDED
|
Binary file (1.24 kB). View file
|
|
|
model/text_type_lang_model.h5
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e8a157f2b6cfbdf954a1fd867f7b4edb9fd92b1dbe33c4dbaa16fdd1e1bc671f
|
| 3 |
+
size 885270680
|
predict.py
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import numpy as np
|
| 3 |
+
import tensorflow as tf
|
| 4 |
+
from keras.api.models import load_model
|
| 5 |
+
from PIL import Image
|
| 6 |
+
import string
|
| 7 |
+
import pandas as pd
|
| 8 |
+
|
| 9 |
+
class TextTypeLangModel:
|
| 10 |
+
def __init__(self, model_path, csv_path=None):
|
| 11 |
+
# Load pre-trained model
|
| 12 |
+
self.model = load_model(model_path)
|
| 13 |
+
self.characters = string.ascii_letters + string.digits + " " + \
|
| 14 |
+
"آبپچڈڑڤکگہہٹژزسٹطظعغفقکگلاںمںنۓہھوؤ" + \
|
| 15 |
+
"ےیئؤٹپجچحخدڈذرزسشصضطظعغفقکلمنوٕں" + \
|
| 16 |
+
"ۓۓہ۔،؛؟"
|
| 17 |
+
|
| 18 |
+
self.num_chars = len(self.characters) + 1 # Extra for blank
|
| 19 |
+
self.char_to_index = {c: i+1 for i, c in enumerate(self.characters)}
|
| 20 |
+
self.index_to_char = {i+1: c for i, c in enumerate(self.characters)}
|
| 21 |
+
|
| 22 |
+
def encode_text(self, text, max_len=10):
|
| 23 |
+
text = text[:max_len].ljust(max_len) # Pad or trim text
|
| 24 |
+
return [self.char_to_index.get(c, 0) for c in text] # Convert to indices
|
| 25 |
+
|
| 26 |
+
def preprocess_image(self, image):
|
| 27 |
+
# Directly use the PIL image object
|
| 28 |
+
image = image.convert("RGB") # Ensure image is in RGB mode
|
| 29 |
+
image = image.resize((128, 128))
|
| 30 |
+
image = np.array(image) / 255.0 # Normalize
|
| 31 |
+
return image
|
| 32 |
+
|
| 33 |
+
def predict(self, image):
|
| 34 |
+
image = self.preprocess_image(image)
|
| 35 |
+
image = np.expand_dims(image, axis=0) # Add batch dimension
|
| 36 |
+
pred_text, pred_type, pred_lang = self.model.predict(image)
|
| 37 |
+
|
| 38 |
+
# Decode text prediction
|
| 39 |
+
pred_text = ''.join(self.index_to_char.get(np.argmax(pred_text[0][i]), '') for i in range(10))
|
| 40 |
+
|
| 41 |
+
return pred_text.strip(), np.argmax(pred_type), np.argmax(pred_lang)
|
| 42 |
+
|
| 43 |
+
def get_type_string(int_type):
|
| 44 |
+
type_switch = {
|
| 45 |
+
0: "Medicine",
|
| 46 |
+
1: "Dosage",
|
| 47 |
+
2: "Diagnostic",
|
| 48 |
+
3: "Symptoms",
|
| 49 |
+
4: "Personal Info",
|
| 50 |
+
5: "Numeric Data",
|
| 51 |
+
6: "Text"
|
| 52 |
+
}
|
| 53 |
+
return type_switch.get(int_type, "Unknown")
|
| 54 |
+
|
| 55 |
+
def predict_text_type_lang(image):
|
| 56 |
+
model = TextTypeLangModel("./model/text_type_lang_model.h5")
|
| 57 |
+
predicted_text, predicted_type, predicted_language = model.predict(image)
|
| 58 |
+
predicted_type_str = get_type_string(predicted_type)
|
| 59 |
+
predicted_language_str = "English" if predicted_language == 0 else "Urdu"
|
| 60 |
+
|
| 61 |
+
return predicted_text, predicted_type_str, predicted_language_str
|
| 62 |
+
|
| 63 |
+
# Gradio interface
|
| 64 |
+
iface = gr.Interface(
|
| 65 |
+
fn=predict_text_type_lang,
|
| 66 |
+
inputs=gr.Image(type="pil"),
|
| 67 |
+
outputs=["text", "text", "text"],
|
| 68 |
+
title="Text Type & Language Prediction",
|
| 69 |
+
description="Upload an image to predict the extracted text, type, and language.",
|
| 70 |
+
)
|
| 71 |
+
|
| 72 |
+
iface.launch(debug=True)
|
requirements.txt
ADDED
|
@@ -0,0 +1,151 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
arabic-reshaper==3.0.0
|
| 2 |
+
asttokens==3.0.0
|
| 3 |
+
comm==0.2.2
|
| 4 |
+
debugpy==1.8.12
|
| 5 |
+
decorator==5.1.1
|
| 6 |
+
executing==2.1.0
|
| 7 |
+
ipykernel==6.29.5
|
| 8 |
+
ipython==8.31.0
|
| 9 |
+
jedi==0.19.2
|
| 10 |
+
jupyter_client==8.6.3
|
| 11 |
+
jupyter_core==5.7.2
|
| 12 |
+
Mako==1.3.6
|
| 13 |
+
MarkupSafe==3.0.2
|
| 14 |
+
matplotlib-inline==0.1.7
|
| 15 |
+
meson==1.6.0
|
| 16 |
+
nest-asyncio==1.6.0
|
| 17 |
+
parso==0.8.4
|
| 18 |
+
pillow==11.0.0
|
| 19 |
+
prompt_toolkit==3.0.48
|
| 20 |
+
pure_eval==0.2.3
|
| 21 |
+
pybind11==2.13.6
|
| 22 |
+
Pygments==2.19.1
|
| 23 |
+
python-bidi==0.6.3
|
| 24 |
+
pywin32==308
|
| 25 |
+
pyzmq==26.2.0
|
| 26 |
+
setuptools==75.6.0
|
| 27 |
+
stack-data==0.6.3
|
| 28 |
+
tornado==6.4.2
|
| 29 |
+
traitlets==5.14.3
|
| 30 |
+
wcwidth==0.2.13
|
| 31 |
+
accelerate==1.2.1
|
| 32 |
+
acres==0.2.0
|
| 33 |
+
aiofiles==24.1.0
|
| 34 |
+
aiohappyeyeballs==2.4.4
|
| 35 |
+
aiohttp==3.11.11
|
| 36 |
+
aiosignal==1.3.2
|
| 37 |
+
annotated-types==0.7.0
|
| 38 |
+
antlr4-python3-runtime==4.9.3
|
| 39 |
+
anyio==4.8.0
|
| 40 |
+
attrs==25.1.0
|
| 41 |
+
certifi==2024.12.14
|
| 42 |
+
charset-normalizer==3.4.1
|
| 43 |
+
ci-info==0.3.0
|
| 44 |
+
click==8.1.8
|
| 45 |
+
colorama==0.4.6
|
| 46 |
+
configobj==5.0.9
|
| 47 |
+
configparser==7.1.0
|
| 48 |
+
contourpy==1.3.1
|
| 49 |
+
cycler==0.12.1
|
| 50 |
+
datasets==3.2.0
|
| 51 |
+
diffusers==0.32.1
|
| 52 |
+
dill==0.3.8
|
| 53 |
+
docker-pycreds==0.4.0
|
| 54 |
+
efficientnet_pytorch==0.7.1
|
| 55 |
+
einops==0.8.0
|
| 56 |
+
etelemetry==0.3.1
|
| 57 |
+
evaluate==0.4.3
|
| 58 |
+
filelock==3.16.1
|
| 59 |
+
fonttools==4.55.3
|
| 60 |
+
frontend==0.0.3
|
| 61 |
+
frozenlist==1.5.0
|
| 62 |
+
fsspec==2024.9.0
|
| 63 |
+
gitdb==4.0.12
|
| 64 |
+
GitPython==3.1.44
|
| 65 |
+
h11==0.14.0
|
| 66 |
+
httplib2==0.22.0
|
| 67 |
+
huggingface-hub==0.27.1
|
| 68 |
+
idna==3.10
|
| 69 |
+
imageio==2.36.1
|
| 70 |
+
importlib_metadata==8.5.0
|
| 71 |
+
isodate==0.6.1
|
| 72 |
+
itsdangerous==2.2.0
|
| 73 |
+
Jinja2==3.1.5
|
| 74 |
+
jiwer==3.1.0
|
| 75 |
+
joblib==1.4.2
|
| 76 |
+
kiwisolver==1.4.8
|
| 77 |
+
lazy_loader==0.4
|
| 78 |
+
looseversion==1.3.0
|
| 79 |
+
lxml==5.3.0
|
| 80 |
+
matplotlib==3.10.0
|
| 81 |
+
mpmath==1.3.0
|
| 82 |
+
multidict==6.1.0
|
| 83 |
+
multiprocess==0.70.16
|
| 84 |
+
munch==4.0.0
|
| 85 |
+
natsort==8.4.0
|
| 86 |
+
networkx==3.4.2
|
| 87 |
+
nibabel==5.3.2
|
| 88 |
+
nipype==1.9.2
|
| 89 |
+
numpy==2.2.1
|
| 90 |
+
omegaconf==2.3.0
|
| 91 |
+
opencv-python==4.10.0.84
|
| 92 |
+
packaging==24.2
|
| 93 |
+
pandas==2.2.3
|
| 94 |
+
pathlib==1.0.1
|
| 95 |
+
pip==24.3.1
|
| 96 |
+
platformdirs==4.3.6
|
| 97 |
+
plotly==5.24.1
|
| 98 |
+
pretrainedmodels==0.7.4
|
| 99 |
+
propcache==0.2.1
|
| 100 |
+
protobuf==5.29.3
|
| 101 |
+
prov==2.0.1
|
| 102 |
+
psutil==6.1.1
|
| 103 |
+
puremagic==1.28
|
| 104 |
+
pyarrow==19.0.0
|
| 105 |
+
pydantic==2.10.5
|
| 106 |
+
pydantic_core==2.27.2
|
| 107 |
+
pydot==3.0.4
|
| 108 |
+
PyMuPDF==1.25.2
|
| 109 |
+
PyMySQL==1.1.1
|
| 110 |
+
pyparsing==3.2.1
|
| 111 |
+
python-dateutil==2.9.0.post0
|
| 112 |
+
pytz==2024.2
|
| 113 |
+
pyxnat==1.6.2
|
| 114 |
+
PyYAML==6.0.2
|
| 115 |
+
RapidFuzz==3.12.1
|
| 116 |
+
rdflib==6.3.2
|
| 117 |
+
regex==2024.11.6
|
| 118 |
+
requests==2.32.3
|
| 119 |
+
safetensors==0.5.2
|
| 120 |
+
scikit-image==0.25.0
|
| 121 |
+
scikit-learn==1.6.1
|
| 122 |
+
scipy==1.15.1
|
| 123 |
+
segmentation_models_pytorch==0.4.0
|
| 124 |
+
sentence-transformers==3.1.1
|
| 125 |
+
sentry-sdk==2.19.2
|
| 126 |
+
setproctitle==1.3.4
|
| 127 |
+
simplejson==3.19.3
|
| 128 |
+
six==1.17.0
|
| 129 |
+
smmap==5.0.2
|
| 130 |
+
sniffio==1.3.1
|
| 131 |
+
starlette==0.45.3
|
| 132 |
+
sympy==1.13.1
|
| 133 |
+
tenacity==9.0.0
|
| 134 |
+
threadpoolctl==3.5.0
|
| 135 |
+
tifffile==2025.1.10
|
| 136 |
+
timm==1.0.13
|
| 137 |
+
tokenizers==0.20.3
|
| 138 |
+
torch==2.5.1
|
| 139 |
+
torchvision==0.20.1
|
| 140 |
+
tqdm==4.67.1
|
| 141 |
+
traits==7.0.2
|
| 142 |
+
transformers==4.45.2
|
| 143 |
+
typing_extensions==4.12.2
|
| 144 |
+
tzdata==2025.1
|
| 145 |
+
unet==0.8.1
|
| 146 |
+
urllib3==2.3.0
|
| 147 |
+
uvicorn==0.34.0
|
| 148 |
+
wandb==0.19.2
|
| 149 |
+
xxhash==3.5.0
|
| 150 |
+
yarl==1.18.3
|
| 151 |
+
zipp==3.21.0
|
train_from_scratch.py
ADDED
|
@@ -0,0 +1,109 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import numpy as np
|
| 3 |
+
import tensorflow as tf
|
| 4 |
+
from keras import Model, Input
|
| 5 |
+
from keras.api.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, Reshape, TimeDistributed
|
| 6 |
+
from sklearn.model_selection import train_test_split
|
| 7 |
+
from keras.api.utils import to_categorical
|
| 8 |
+
from PIL import Image
|
| 9 |
+
import string
|
| 10 |
+
|
| 11 |
+
class TextTypeLangModel:
|
| 12 |
+
def __init__(self, csv_path):
|
| 13 |
+
self.df = pd.read_csv(csv_path)
|
| 14 |
+
self.characters = string.ascii_letters + string.digits + " " + \
|
| 15 |
+
"آبپچڈڑڤکگہہٹژزسٹطظعغفقکگلاںمںنۓہھوؤ" + \
|
| 16 |
+
"ےیئؤٹپجچحخدڈذرزسشصضطظعغفقکلمنوٕں" + \
|
| 17 |
+
"ۓۓہ۔،؛؟"
|
| 18 |
+
|
| 19 |
+
self.num_chars = len(self.characters) + 1 # Extra for blank
|
| 20 |
+
self.char_to_index = {c: i+1 for i, c in enumerate(self.characters)}
|
| 21 |
+
self.index_to_char = {i+1: c for i, c in enumerate(self.characters)}
|
| 22 |
+
self.model = self.build_model()
|
| 23 |
+
|
| 24 |
+
def encode_text(self, text, max_len=10):
|
| 25 |
+
text = text[:max_len].ljust(max_len) # Pad or trim text
|
| 26 |
+
return [self.char_to_index.get(c, 0) for c in text] # Convert to indices
|
| 27 |
+
|
| 28 |
+
def preprocess_image(self, image_path):
|
| 29 |
+
image = Image.open(image_path).convert("RGB")
|
| 30 |
+
image = image.resize((128, 128))
|
| 31 |
+
image = np.array(image) / 255.0 # Normalize
|
| 32 |
+
return image
|
| 33 |
+
|
| 34 |
+
def prepare_data(self):
|
| 35 |
+
X_images = np.array([self.preprocess_image(img) for img in self.df['Cropped Image Path']])
|
| 36 |
+
y_text = np.array([self.encode_text(txt) for txt in self.df['Text']])
|
| 37 |
+
y_type = to_categorical(self.df['type'].values, num_classes=7)
|
| 38 |
+
y_lang = to_categorical(self.df['language'].values, num_classes=2)
|
| 39 |
+
return train_test_split(X_images, to_categorical(y_text, self.num_chars), y_type, y_lang, test_size=0.2, random_state=42)
|
| 40 |
+
|
| 41 |
+
def build_model(self):
|
| 42 |
+
input_layer = Input(shape=(128, 128, 3))
|
| 43 |
+
x = Conv2D(32, (3, 3), activation='relu')(input_layer)
|
| 44 |
+
x = MaxPooling2D()(x)
|
| 45 |
+
x = Conv2D(64, (3, 3), activation='relu')(x)
|
| 46 |
+
x = MaxPooling2D()(x)
|
| 47 |
+
x = Flatten()(x)
|
| 48 |
+
|
| 49 |
+
# Increase the number of units in Dense to match the required total number of features
|
| 50 |
+
x = Dense(1280, activation='relu')(x) # Set to 1280 units (10 * 128) to match reshape size
|
| 51 |
+
|
| 52 |
+
# Reshape the output to have 10 time steps and each time step has 128 features
|
| 53 |
+
x = Reshape((10, 128))(x) # Shape (batch_size, 10, 128)
|
| 54 |
+
|
| 55 |
+
# TimeDistributed applied to Dense layer for text output (10-character sequence)
|
| 56 |
+
time_distributed_text_output = TimeDistributed(Dense(self.num_chars, activation='softmax'))(x)
|
| 57 |
+
text_output = Reshape((10, self.num_chars), name='text_output')(time_distributed_text_output) # 10-character output
|
| 58 |
+
|
| 59 |
+
# Type and language prediction
|
| 60 |
+
output_type = Dense(7, activation='softmax', name='type_output')(x)
|
| 61 |
+
output_lang = Dense(2, activation='softmax', name='lang_output')(x)
|
| 62 |
+
|
| 63 |
+
model = Model(inputs=input_layer, outputs=[text_output, output_type, output_lang])
|
| 64 |
+
model.compile(optimizer='adam',
|
| 65 |
+
loss=['categorical_crossentropy', 'categorical_crossentropy', 'categorical_crossentropy'],
|
| 66 |
+
metrics={'text_output': 'accuracy', 'type_output': 'accuracy', 'lang_output': 'accuracy'})
|
| 67 |
+
return model
|
| 68 |
+
|
| 69 |
+
def train(self, epochs=10, batch_size=32):
|
| 70 |
+
X_train, X_test, y_train_text, y_test_text, y_train_type, y_test_type, y_train_lang, y_test_lang = self.prepare_data()
|
| 71 |
+
|
| 72 |
+
# Reshape the target labels to match the output of the model (batch_size, 10, num_classes)
|
| 73 |
+
y_train_type = np.expand_dims(y_train_type, axis=1)
|
| 74 |
+
y_train_type = np.repeat(y_train_type, 10, axis=1) # Repeat along the time axis
|
| 75 |
+
|
| 76 |
+
y_test_type = np.expand_dims(y_test_type, axis=1)
|
| 77 |
+
y_test_type = np.repeat(y_test_type, 10, axis=1)
|
| 78 |
+
|
| 79 |
+
y_train_lang = np.expand_dims(y_train_lang, axis=1)
|
| 80 |
+
y_train_lang = np.repeat(y_train_lang, 10, axis=1) # Repeat along the time axis
|
| 81 |
+
|
| 82 |
+
y_test_lang = np.expand_dims(y_test_lang, axis=1)
|
| 83 |
+
y_test_lang = np.repeat(y_test_lang, 10, axis=1)
|
| 84 |
+
|
| 85 |
+
# Now, fit the model
|
| 86 |
+
self.model.fit(X_train, [y_train_text, y_train_type, y_train_lang],
|
| 87 |
+
validation_data=(X_test, [y_test_text, y_test_type, y_test_lang]),
|
| 88 |
+
epochs=epochs, batch_size=batch_size)
|
| 89 |
+
self.model.save("text_type_lang_model.h5")
|
| 90 |
+
|
| 91 |
+
def predict(self, image_path):
|
| 92 |
+
image = self.preprocess_image(image_path)
|
| 93 |
+
image = np.expand_dims(image, axis=0)
|
| 94 |
+
pred_text, pred_type, pred_lang = self.model.predict(image)
|
| 95 |
+
pred_text = ''.join(self.index_to_char.get(np.argmax(pred_text[0][i]), '') for i in range(10))
|
| 96 |
+
return pred_text.strip(), np.argmax(pred_type), np.argmax(pred_lang)
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
# Initialize and train the model
|
| 100 |
+
model = TextTypeLangModel("all_cropped_data_cleaned.csv")
|
| 101 |
+
model.train()
|
| 102 |
+
|
| 103 |
+
# Predict using the first image from the dataset
|
| 104 |
+
first_image_path = model.df['Cropped Image Path'].iloc[0]
|
| 105 |
+
predicted_text, predicted_type, predicted_language = model.predict(first_image_path)
|
| 106 |
+
|
| 107 |
+
print("Predicted Text:", predicted_text)
|
| 108 |
+
print("Predicted Type:", predicted_type)
|
| 109 |
+
print("Predicted Language:", "English" if predicted_language == 0 else "Urdu")
|