Spaces:

Umer2762
/

Doctor_Handwriting_Text_Detection

Sleeping

App Files Files Community

Umer2762 commited on Feb 17, 2025

Commit

5626a1a

verified ·

1 Parent(s): b46785e

Upload folder using huggingface_hub

Browse files

Files changed (22) hide show

DataModels/AnnotatedData.py +16 -0
DataModels/ImageMetadata.py +18 -0
DataModels/Region.py +31 -0
DataModels/RegionAttributes.py +25 -0
DataModels/ShapeAttributes.py +17 -0
DataModels/__pycache__/AnnotatedData.cpython-312.pyc +0 -0
DataModels/__pycache__/ImageMetadata.cpython-312.pyc +0 -0
DataModels/__pycache__/Region.cpython-312.pyc +0 -0
DataModels/__pycache__/RegionAttributes.cpython-312.pyc +0 -0
DataModels/__pycache__/ShapeAttributes.cpython-312.pyc +0 -0
OutputMetadataCreator.py +139 -0
README.md +2 -8
RemoveNotCorrectImages..py +279 -0
__pycache__/AnnotatedData.cpython-312.pyc +0 -0
__pycache__/ImageMetadata.cpython-312.pyc +0 -0
__pycache__/Region.cpython-312.pyc +0 -0
__pycache__/RegionAttributes.cpython-312.pyc +0 -0
__pycache__/ShapeAttributes.cpython-312.pyc +0 -0
model/text_type_lang_model.h5 +3 -0
predict.py +72 -0
requirements.txt +151 -0
train_from_scratch.py +109 -0

DataModels/AnnotatedData.py ADDED Viewed

	@@ -0,0 +1,16 @@

+from typing import List, Dict, Optional
+from .ImageMetadata import ImageMetadata
+class AnnotatedData:
+    def __init__(self, data: Dict):
+        self.image_ids = data.get("_via_image_id_list", [])
+        self.metadata = {
+            img_id: ImageMetadata(
+                data["_via_img_metadata"][img_id]["filename"],
+                data["_via_img_metadata"][img_id]["size"],
+                data["_via_img_metadata"][img_id].get("regions", [])
+            ) for img_id in self.image_ids if img_id in data["_via_img_metadata"]
+        }
+    def __repr__(self):
+        return f"AnnotatedData(Images={list(self.metadata.keys())})"

DataModels/ImageMetadata.py ADDED Viewed

	@@ -0,0 +1,18 @@

+from typing import List, Dict
+from .Region import Region
+class ImageMetadata:
+    def __init__(self, filename: str, size: int, regions: List[Dict]):
+        self.filename = filename
+        self.size = size
+        self.regions = [Region(region.get("shape_attributes", {}), region.get("region_attributes", {})) for region in regions]
+    def to_dict(self):
+        return {
+            "filename": self.filename,
+            "size": self.size,
+            "regions": [region.to_dict() for region in self.regions]  # Convert Region objects to dictionaries
+        }
+    def __repr__(self):
+        return f"ImageMetadata(Filename={self.filename}, Size={self.size}, Regions={self.regions})"

DataModels/Region.py ADDED Viewed

	@@ -0,0 +1,31 @@

+from typing import Dict
+from .ShapeAttributes import ShapeAttributes
+from .RegionAttributes import RegionAttributes
+class Region:
+    def __init__(self, shape_attributes: Dict, region_attributes: Dict):
+        self.shape_attributes = ShapeAttributes(
+            shape_attributes.get("x"),
+            shape_attributes.get("y"),
+            shape_attributes.get("width"),
+            shape_attributes.get("height"),
+        )
+        self.region_attributes = RegionAttributes(
+            region_attributes.get("Language", "English"),
+            region_attributes.get("Dosage", ""),
+            region_attributes.get("Dignostic", ""),
+            region_attributes.get("Symptoms", ""),
+            region_attributes.get("Medicine Name", ""),
+            region_attributes.get("Text", ""),
+            region_attributes.get("Personal Information", "N/A"),
+            region_attributes.get("Numeric Data", "N/A"),
+        )
+    def to_dict(self):
+        return {
+            "shape_attributes": self.shape_attributes.to_dict(),
+            "region_attributes": self.region_attributes.to_dict(),
+        }
+    def __repr__(self):
+        return f"Region({self.shape_attributes}, {self.region_attributes})"

DataModels/RegionAttributes.py ADDED Viewed

	@@ -0,0 +1,25 @@

+class RegionAttributes:
+    def __init__(self, language: str, dosage: str, dignostic: str, symptoms: str, medicine_name: str, text: str, personal_info: str, numeric_data: str):
+        self.language = language
+        self.dosage = dosage
+        self.dignostic = dignostic
+        self.symptoms = symptoms
+        self.medicine_name = medicine_name
+        self.text = text
+        self.personal_info = personal_info
+        self.numeric_data = numeric_data
+    def to_dict(self):
+        return {
+            "Language": self.language,
+            "Dosage": self.dosage,
+            "Dignostic": self.dignostic,
+            "Symptoms": self.symptoms,
+            "Medicine Name": self.medicine_name,
+            "Text": self.text,
+            "Personal Information": self.personal_info,
+            "Numeric Data": self.numeric_data,
+        }
+    def __repr__(self):
+        return f"RegionAttributes(Language={self.language}, Dosage={self.dosage}, Dignostic={self.dignostic}, Symptoms={self.symptoms}, Medicine Name={self.medicine_name}, Text={self.text}, Personal Info={self.personal_info}, Numeric Data={self.numeric_data})"

DataModels/ShapeAttributes.py ADDED Viewed

	@@ -0,0 +1,17 @@

+class ShapeAttributes:
+    def __init__(self, x: int, y: int, width: int, height: int):
+        self.x = x
+        self.y = y
+        self.width = width
+        self.height = height
+    def to_dict(self):
+        return {
+            "x": self.x,
+            "y": self.y,
+            "width": self.width,
+            "height": self.height,
+        }
+    def __repr__(self):
+        return f"ShapeAttributes(x={self.x}, y={self.y}, width={self.width}, height={self.height})"

DataModels/__pycache__/AnnotatedData.cpython-312.pyc ADDED Viewed

Binary file (1.38 kB). View file

DataModels/__pycache__/ImageMetadata.cpython-312.pyc ADDED Viewed

Binary file (1.63 kB). View file

DataModels/__pycache__/Region.cpython-312.pyc ADDED Viewed

Binary file (1.97 kB). View file

DataModels/__pycache__/RegionAttributes.cpython-312.pyc ADDED Viewed

Binary file (2.08 kB). View file

DataModels/__pycache__/ShapeAttributes.cpython-312.pyc ADDED Viewed

Binary file (1.25 kB). View file

OutputMetadataCreator.py ADDED Viewed

	@@ -0,0 +1,139 @@

+import os
+import csv
+import json
+from PIL import Image
+from DataModels.AnnotatedData import AnnotatedData
+from DataModels.Region import Region
+def crop_and_save_regions(image_path: str, regions: list[Region], output_folder: str, base_filename: str):
+    """
+    Crops regions from an image and saves them to the output folder.
+    Returns a list of tuples containing the cropped image path and the corresponding text.
+    """
+    cropped_data = []
+    if not os.path.exists(image_path):
+        print(f"Skipping {image_path}: Image file not found.")
+        return cropped_data
+    img = Image.open(image_path)
+    for idx, region in enumerate(regions):
+        try:
+            # Extract region coordinates
+            x, y, width, height = (
+                region.shape_attributes.x,
+                region.shape_attributes.y,
+                region.shape_attributes.width,
+                region.shape_attributes.height,
+            )
+            # Crop the region
+            cropped_img = img.crop((x, y, x + width, y + height))
+            cropped_img = cropped_img.convert("RGB")
+            # Generate the cropped image name
+            cropped_image_name = f"{base_filename}_{idx + 1}.jpg"
+            cropped_image_path = os.path.join(output_folder, cropped_image_name)
+            # Save the cropped image
+            cropped_img.save(cropped_image_path)
+            languageInt = 0
+            # Extract text from region attributes
+            if region.region_attributes.language == "English":
+                languageInt = 0
+            else:
+                languageInt = 1
+            int_type = 0
+            text = ""
+            if region.region_attributes.medicine_name:
+                int_type = 0
+                text = region.region_attributes.medicine_name
+            elif region.region_attributes.dosage:
+                int_type = 1
+                text = region.region_attributes.dosage
+            elif region.region_attributes.dignostic:
+                int_type = 2
+                text = region.region_attributes.dignostic
+            elif region.region_attributes.symptoms:
+                int_type = 3
+                text = region.region_attributes.symptoms
+            elif region.region_attributes.personal_info:
+                int_type = 4
+                text = region.region_attributes.personal_info
+            elif region.region_attributes.numeric_data:
+                int_type = 5
+                text = region.region_attributes.numeric_data
+            elif region.region_attributes.text:
+                int_type = 6
+                text = region.region_attributes.text
+            text.replace("\n","").replace("\"","").replace(",","`")
+            # Add to the list of cropped data
+            cropped_data.append((cropped_image_path, text, int_type, languageInt))
+        except Exception as e:
+            print(f"Error cropping region {idx + 1} from {image_path}: {e}")
+    return cropped_data
+def process_folders_to_csv_and_crop(base_folder: str, output_csv: str, cropped_images_folder: str):
+    """
+    Processes multiple dr folders containing JSON annotations and images.
+    Crops regions from images, saves them to a folder, and consolidates into a single CSV file.
+    """
+    os.makedirs(cropped_images_folder, exist_ok=True)  # Ensure cropped images folder exists
+    # Initialize CSV data
+    csv_data = [["Cropped Image Path", "Text","type","language"]]
+    # Loop through all folders starting with 'dr'
+    for folder_name in os.listdir(base_folder):
+        folder_path = os.path.join(base_folder, folder_name)
+        if not os.path.isdir(folder_path) or not folder_name.startswith("dr"):
+            continue  # Skip if not a valid dr folder
+        json_path = os.path.join(folder_path, f"{folder_name}.json")
+        if not os.path.exists(json_path):
+            print(f"Skipping {folder_path}: No JSON file found.")
+            continue
+        with open(json_path, "r", encoding="utf-8") as file:
+            data = json.load(file)
+            annotated_data = AnnotatedData(data)
+        # Process each image in the annotated data
+        for image_id, metadata in annotated_data.metadata.items():
+            image_path = os.path.join(folder_path, metadata.filename)
+            base_filename = os.path.splitext(metadata.filename)[0]  # Remove file extension
+            # Crop regions and save to folder
+            cropped_data = crop_and_save_regions(image_path, metadata.regions, cropped_images_folder, base_filename)
+            # Add cropped data to CSV data
+            csv_data.extend(cropped_data)
+    # Write to a single CSV file
+    with open(output_csv, "w", newline="", encoding="utf-8") as file:
+        writer = csv.writer(file)
+        writer.writerows(csv_data)
+    print(f"CSV file created: {output_csv}")
+def clean_second_column(overall_output_csv, output_file):
+    with open(overall_output_csv, mode='r', newline='', encoding='utf-8') as infile, \
+         open(output_file, mode='w', newline='', encoding='utf-8') as outfile:
+        reader = csv.reader(infile)
+        writer = csv.writer(outfile)
+        for row in reader:
+            if len(row) > 1:  # Ensure the second column exists
+                row[1] = row[1].replace(',', '').replace('"', '').replace('\n', ' ')
+                row[0] = row[0].replace("./","").replace("\\","/")
+            if len(row[1].strip()) > 0:
+                writer.writerow(row)
+    os.remove(overall_output_csv)
+# Usage Example
+base_folder = "./base_data"  # Base directory containing dr folders
+overall_output_csv = "./all_cropped_data.csv"  # Single output CSV file
+overall_output_csv_cleaned = "./all_cropped_data_cleaned.csv"  # Single output CSV file
+cropped_images_folder = "./all_cropped_images"  # Folder to save all cropped images
+process_folders_to_csv_and_crop(base_folder, overall_output_csv, cropped_images_folder)
+clean_second_column(overall_output_csv,overall_output_csv_cleaned)

README.md CHANGED Viewed

@@ -1,12 +1,6 @@
 ---
-title: Doctor Handwriting Text Detection
-emoji: 🚀
-colorFrom: pink
-colorTo: green
 sdk: gradio
 sdk_version: 5.16.0
-app_file: app.py
-pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Doctor_Handwriting_Text_Detection
+app_file: predict.py
 sdk: gradio
 sdk_version: 5.16.0
 ---

RemoveNotCorrectImages..py ADDED Viewed

	@@ -0,0 +1,279 @@

+import os
+import pandas as pd
+def delete_images_with_patterns(directory: str, patterns: list):
+    """
+    Deletes image files in the given directory if their filenames contain any of the specified patterns.
+    Args:
+        directory (str): The path to the directory containing images.
+        patterns (list): A list of substrings to check in filenames.
+    """
+    if not os.path.exists(directory):
+        print(f"Directory '{directory}' does not exist.")
+        return
+    for filename in os.listdir(directory):
+        file_path = os.path.join(directory, filename)
+        # Check if the filename contains any of the specified patterns
+        if any("dr"+pattern in filename for pattern in patterns):
+            try:
+                os.remove(file_path)
+                print(f"Deleted: {file_path}")
+            except Exception as e:
+                print(f"Error deleting {file_path}: {e}")
+def clean_csv(csv_path: str, patterns: list):
+    """
+    Removes rows from the CSV if the first column contains filenames matching any pattern (e.g., "1_1" -> "dr1_1").
+    Ensures that there are no additional digits after the pattern unless separated by an underscore `_`.
+    """
+    if not os.path.exists(csv_path):
+        print(f"CSV file '{csv_path}' does not exist.")
+        return
+    # Load CSV into a DataFrame
+    df = pd.read_csv(csv_path)
+    # Ensure the first column is treated as a string
+    df.iloc[:, 0] = df.iloc[:, 0].astype(str)
+    # Create modified patterns to match filenames
+    modified_patterns = [f"dr{p}" for p in patterns]
+    # Build a regex pattern to match filenames exactly or with an underscore and additional digits
+    regex_patterns = []
+    for pattern in modified_patterns:
+        # Match the pattern exactly or with an underscore and additional digits
+        regex_patterns.append(f"^{pattern}(_\\d+)?$")
+    # Combine all regex patterns into a single pattern
+    combined_regex = '|'.join(regex_patterns)
+    # Filter out rows where the first column matches any of the regex patterns
+    df = df[~df.iloc[:, 0].str.match(combined_regex, na=False)]
+    # Remove duplicates
+    df.drop_duplicates(inplace=True)
+    # Save cleaned data back to CSV
+    df.to_csv(csv_path, index=False)
+    print(f"Updated CSV saved: {csv_path}")
+# List of text patterns to match in filenames
+patterns_to_delete = [
+    "1_1",
+    "4_1",
+    "4_2",
+    "4_3",
+    "4_4",
+    "4_5",
+    "4_6",
+    "5_1",
+    "5_2",
+    "7_1",
+    "10_1",
+    "24_1",
+    "24_2",
+    "25_1",
+    "25_2",
+    "29_1",
+    "30_1",
+    "33_1",
+    "36_1",
+    "36_4",
+    "36_5",
+    "36_6",
+    "38_1",
+    "38_2",
+    "38_3",
+    "38_4",
+    "38_5",
+    "38_6",
+    "38_7",
+    "38_8",
+    "38_9",
+    "42_1",
+    "42_2",
+    "42_4",
+    "43_1",
+    "43_2",
+    "43_3",
+    "43_4",
+    "43_5",
+    "44_1",
+    "44_2",
+    "44_3",
+    "44_4",
+    "44_6",
+    "45_1",
+    "47_1",
+    "50_1",
+    "57_1",
+    "57_2",
+    "63_1",
+    "64_1",
+    "64_2",
+    "64_3",
+    "64_4",
+    "64_5",
+    "64_6",
+    "64_7",
+    "64_8",
+    "64_9",
+    "65_1",
+    "65_2",
+    "66_1",
+    "66_2",
+    "66_3",
+    "66_4",
+    "66_5",
+    "66_6",
+    "66_7",
+    "66_8",
+    "69_1",
+    "69_2",
+    "69_3",
+    "69_4",
+    "69_5",
+    "69_6",
+    "69_7",
+    "69_8",
+    "69_9",
+    "71_1",
+    "71_2",
+    "71_3",
+    "71_4",
+    "71_5",
+    "73_1",
+    "74_1",
+    "75_1",
+    "75_2",
+    "75_3",
+    "75_4",
+    "75_5",
+    "75_6",
+    "77_1",
+    "77_2",
+    "77_3",
+    "76_1",
+    "76_2",
+    "76_3",
+    "76_4",
+    "76_5",
+    "80_1",
+    "80_2",
+    "82_1",
+    "86_1",
+    "86_2",
+    "86_3",
+    "86_4",
+    "86_5",
+    "87_1",
+    "87_2",
+    "87_3",
+    "87_4",
+    "87_5",
+    "87_6",
+    "89_1",
+    "92_1",
+    "92_2",
+    "93_1",
+    "94_2",
+    "94_1",
+    "95_1",
+    "97_1",
+    "97_2",
+    "102_1",
+    "104_1",
+    "108_1",
+    "109_1",
+    "112_1",
+    "114_1",
+    "114_2",
+    "114_3",
+    "114_4",
+    "114_5",
+    "114_6",
+    "114_7",
+    "114_8",
+    "114_9",
+    "115_1",
+    "115_2",
+    "116_1",
+    "116_2",
+    "116_3",
+    "117_1",
+    "128_1",
+    "130_1",
+    "132_1",
+    "132_2",
+    "132_3",
+    "137_1",
+    "137_2",
+    "137_3",
+    "137_4",
+    "137_5",
+    "137_6",
+    "137_7",
+    "137_8",
+    "137_9",
+    "140_5",
+    "146_1",
+    "146_2",
+    "146_3",
+    "151_1",
+    "151_2",
+    "163_1",
+    "169_1",
+    "173_1",
+    "173_2",
+    "100_1"
+]
+# Specify your target directory
+target_directory = "./cropped_images"  # Change this to your actual directory
+# Run the deletion function
+# delete_images_with_patterns(target_directory, patterns_to_delete)
+patterns_to_delete = [
+    "dr80_2",
+    "dr80_3",
+    "dr81_1",
+    "dr81_1",
+    "dr81_2",
+    "dr83_1",
+    "dr86_1",
+    "dr86_2",
+    "dr86_3",
+    "dr86_4",
+    "dr86_5",
+    "dr87_1",
+    "dr87_2",
+    "dr87_3",
+    "dr87_4",
+    "dr87_5",
+    "dr87_6",
+    "dr88_1",
+    "dr89_1",
+    "dr89_2",
+    "dr9_1",
+    "dr90_1",
+    "dr92_1",
+    "dr92_1",
+    "dr92_2",
+    "dr92_3",
+    "dr93_1",
+    "dr93_2",
+    "dr94_1",
+    "dr94_2",
+    "dr94_3",
+    "dr95_1",
+    "dr95_2",
+    "dr96_1",
+    "dr97_1",
+    "dr97_2",
+    "dr97_3",
+    "dr98_1",
+]
+clean_csv("all_cropped_data.csv",patterns=patterns_to_delete)

__pycache__/AnnotatedData.cpython-312.pyc ADDED Viewed

Binary file (1.36 kB). View file

__pycache__/ImageMetadata.cpython-312.pyc ADDED Viewed

Binary file (1.61 kB). View file

__pycache__/Region.cpython-312.pyc ADDED Viewed

Binary file (1.96 kB). View file

__pycache__/RegionAttributes.cpython-312.pyc ADDED Viewed

Binary file (2.07 kB). View file

__pycache__/ShapeAttributes.cpython-312.pyc ADDED Viewed

Binary file (1.24 kB). View file

model/text_type_lang_model.h5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e8a157f2b6cfbdf954a1fd867f7b4edb9fd92b1dbe33c4dbaa16fdd1e1bc671f
+size 885270680

predict.py ADDED Viewed

	@@ -0,0 +1,72 @@

+import gradio as gr
+import numpy as np
+import tensorflow as tf
+from keras.api.models import load_model
+from PIL import Image
+import string
+import pandas as pd
+class TextTypeLangModel:
+    def __init__(self, model_path, csv_path=None):
+        # Load pre-trained model
+        self.model = load_model(model_path)
+        self.characters = string.ascii_letters + string.digits + " " + \
+                        "آبپچڈڑڤکگہہٹژزسٹطظعغفقکگلاںمںنۓہھوؤ" + \
+                        "ےیئؤٹپجچحخدڈذرزسشصضطظعغفقکلمنوٕں" + \
+                        "ۓۓہ۔،؛؟"
+        self.num_chars = len(self.characters) + 1  # Extra for blank
+        self.char_to_index = {c: i+1 for i, c in enumerate(self.characters)}
+        self.index_to_char = {i+1: c for i, c in enumerate(self.characters)}
+    def encode_text(self, text, max_len=10):
+        text = text[:max_len].ljust(max_len)  # Pad or trim text
+        return [self.char_to_index.get(c, 0) for c in text]  # Convert to indices
+    def preprocess_image(self, image):
+        # Directly use the PIL image object
+        image = image.convert("RGB")  # Ensure image is in RGB mode
+        image = image.resize((128, 128))
+        image = np.array(image) / 255.0  # Normalize
+        return image
+    def predict(self, image):
+        image = self.preprocess_image(image)
+        image = np.expand_dims(image, axis=0)  # Add batch dimension
+        pred_text, pred_type, pred_lang = self.model.predict(image)
+        # Decode text prediction
+        pred_text = ''.join(self.index_to_char.get(np.argmax(pred_text[0][i]), '') for i in range(10))
+        return pred_text.strip(), np.argmax(pred_type), np.argmax(pred_lang)
+def get_type_string(int_type):
+    type_switch = {
+        0: "Medicine",
+        1: "Dosage",
+        2: "Diagnostic",
+        3: "Symptoms",
+        4: "Personal Info",
+        5: "Numeric Data",
+        6: "Text"
+    }
+    return type_switch.get(int_type, "Unknown")
+def predict_text_type_lang(image):
+    model = TextTypeLangModel("./model/text_type_lang_model.h5")
+    predicted_text, predicted_type, predicted_language = model.predict(image)
+    predicted_type_str = get_type_string(predicted_type)
+    predicted_language_str = "English" if predicted_language == 0 else "Urdu"
+    return predicted_text, predicted_type_str, predicted_language_str
+# Gradio interface
+iface = gr.Interface(
+    fn=predict_text_type_lang,
+    inputs=gr.Image(type="pil"),
+    outputs=["text", "text", "text"],
+    title="Text Type & Language Prediction",
+    description="Upload an image to predict the extracted text, type, and language.",
+)
+iface.launch(debug=True)

requirements.txt ADDED Viewed

	@@ -0,0 +1,151 @@

+arabic-reshaper==3.0.0
+asttokens==3.0.0
+comm==0.2.2
+debugpy==1.8.12
+decorator==5.1.1
+executing==2.1.0
+ipykernel==6.29.5
+ipython==8.31.0
+jedi==0.19.2
+jupyter_client==8.6.3
+jupyter_core==5.7.2
+Mako==1.3.6
+MarkupSafe==3.0.2
+matplotlib-inline==0.1.7
+meson==1.6.0
+nest-asyncio==1.6.0
+parso==0.8.4
+pillow==11.0.0
+prompt_toolkit==3.0.48
+pure_eval==0.2.3
+pybind11==2.13.6
+Pygments==2.19.1
+python-bidi==0.6.3
+pywin32==308
+pyzmq==26.2.0
+setuptools==75.6.0
+stack-data==0.6.3
+tornado==6.4.2
+traitlets==5.14.3
+wcwidth==0.2.13
+accelerate==1.2.1
+acres==0.2.0
+aiofiles==24.1.0
+aiohappyeyeballs==2.4.4
+aiohttp==3.11.11
+aiosignal==1.3.2
+annotated-types==0.7.0
+antlr4-python3-runtime==4.9.3
+anyio==4.8.0
+attrs==25.1.0
+certifi==2024.12.14
+charset-normalizer==3.4.1
+ci-info==0.3.0
+click==8.1.8
+colorama==0.4.6
+configobj==5.0.9
+configparser==7.1.0
+contourpy==1.3.1
+cycler==0.12.1
+datasets==3.2.0
+diffusers==0.32.1
+dill==0.3.8
+docker-pycreds==0.4.0
+efficientnet_pytorch==0.7.1
+einops==0.8.0
+etelemetry==0.3.1
+evaluate==0.4.3
+filelock==3.16.1
+fonttools==4.55.3
+frontend==0.0.3
+frozenlist==1.5.0
+fsspec==2024.9.0
+gitdb==4.0.12
+GitPython==3.1.44
+h11==0.14.0
+httplib2==0.22.0
+huggingface-hub==0.27.1
+idna==3.10
+imageio==2.36.1
+importlib_metadata==8.5.0
+isodate==0.6.1
+itsdangerous==2.2.0
+Jinja2==3.1.5
+jiwer==3.1.0
+joblib==1.4.2
+kiwisolver==1.4.8
+lazy_loader==0.4
+looseversion==1.3.0
+lxml==5.3.0
+matplotlib==3.10.0
+mpmath==1.3.0
+multidict==6.1.0
+multiprocess==0.70.16
+munch==4.0.0
+natsort==8.4.0
+networkx==3.4.2
+nibabel==5.3.2
+nipype==1.9.2
+numpy==2.2.1
+omegaconf==2.3.0
+opencv-python==4.10.0.84
+packaging==24.2
+pandas==2.2.3
+pathlib==1.0.1
+pip==24.3.1
+platformdirs==4.3.6
+plotly==5.24.1
+pretrainedmodels==0.7.4
+propcache==0.2.1
+protobuf==5.29.3
+prov==2.0.1
+psutil==6.1.1
+puremagic==1.28
+pyarrow==19.0.0
+pydantic==2.10.5
+pydantic_core==2.27.2
+pydot==3.0.4
+PyMuPDF==1.25.2
+PyMySQL==1.1.1
+pyparsing==3.2.1
+python-dateutil==2.9.0.post0
+pytz==2024.2
+pyxnat==1.6.2
+PyYAML==6.0.2
+RapidFuzz==3.12.1
+rdflib==6.3.2
+regex==2024.11.6
+requests==2.32.3
+safetensors==0.5.2
+scikit-image==0.25.0
+scikit-learn==1.6.1
+scipy==1.15.1
+segmentation_models_pytorch==0.4.0
+sentence-transformers==3.1.1
+sentry-sdk==2.19.2
+setproctitle==1.3.4
+simplejson==3.19.3
+six==1.17.0
+smmap==5.0.2
+sniffio==1.3.1
+starlette==0.45.3
+sympy==1.13.1
+tenacity==9.0.0
+threadpoolctl==3.5.0
+tifffile==2025.1.10
+timm==1.0.13
+tokenizers==0.20.3
+torch==2.5.1
+torchvision==0.20.1
+tqdm==4.67.1
+traits==7.0.2
+transformers==4.45.2
+typing_extensions==4.12.2
+tzdata==2025.1
+unet==0.8.1
+urllib3==2.3.0
+uvicorn==0.34.0
+wandb==0.19.2
+xxhash==3.5.0
+yarl==1.18.3
+zipp==3.21.0

train_from_scratch.py ADDED Viewed

	@@ -0,0 +1,109 @@

+import pandas as pd
+import numpy as np
+import tensorflow as tf
+from keras import Model, Input
+from keras.api.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, Reshape, TimeDistributed
+from sklearn.model_selection import train_test_split
+from keras.api.utils import to_categorical
+from PIL import Image
+import string
+class TextTypeLangModel:
+    def __init__(self, csv_path):
+        self.df = pd.read_csv(csv_path)
+        self.characters = string.ascii_letters + string.digits + " " + \
+                        "آبپچڈڑڤکگہہٹژزسٹطظعغفقکگلاںمںنۓہھوؤ" + \
+                        "ےیئؤٹپجچحخدڈذرزسشصضطظعغفقکلمنوٕں" + \
+                        "ۓۓہ۔،؛؟"
+        self.num_chars = len(self.characters) + 1  # Extra for blank
+        self.char_to_index = {c: i+1 for i, c in enumerate(self.characters)}
+        self.index_to_char = {i+1: c for i, c in enumerate(self.characters)}
+        self.model = self.build_model()
+    def encode_text(self, text, max_len=10):
+        text = text[:max_len].ljust(max_len)  # Pad or trim text
+        return [self.char_to_index.get(c, 0) for c in text]  # Convert to indices
+    def preprocess_image(self, image_path):
+        image = Image.open(image_path).convert("RGB")
+        image = image.resize((128, 128))
+        image = np.array(image) / 255.0  # Normalize
+        return image
+    def prepare_data(self):
+        X_images = np.array([self.preprocess_image(img) for img in self.df['Cropped Image Path']])
+        y_text = np.array([self.encode_text(txt) for txt in self.df['Text']])
+        y_type = to_categorical(self.df['type'].values, num_classes=7)
+        y_lang = to_categorical(self.df['language'].values, num_classes=2)
+        return train_test_split(X_images, to_categorical(y_text, self.num_chars), y_type, y_lang, test_size=0.2, random_state=42)
+    def build_model(self):
+        input_layer = Input(shape=(128, 128, 3))
+        x = Conv2D(32, (3, 3), activation='relu')(input_layer)
+        x = MaxPooling2D()(x)
+        x = Conv2D(64, (3, 3), activation='relu')(x)
+        x = MaxPooling2D()(x)
+        x = Flatten()(x)
+        # Increase the number of units in Dense to match the required total number of features
+        x = Dense(1280, activation='relu')(x)  # Set to 1280 units (10 * 128) to match reshape size
+        # Reshape the output to have 10 time steps and each time step has 128 features
+        x = Reshape((10, 128))(x)  # Shape (batch_size, 10, 128)
+        # TimeDistributed applied to Dense layer for text output (10-character sequence)
+        time_distributed_text_output = TimeDistributed(Dense(self.num_chars, activation='softmax'))(x)
+        text_output = Reshape((10, self.num_chars), name='text_output')(time_distributed_text_output)  # 10-character output
+        # Type and language prediction
+        output_type = Dense(7, activation='softmax', name='type_output')(x)
+        output_lang = Dense(2, activation='softmax', name='lang_output')(x)
+        model = Model(inputs=input_layer, outputs=[text_output, output_type, output_lang])
+        model.compile(optimizer='adam',
+                      loss=['categorical_crossentropy', 'categorical_crossentropy', 'categorical_crossentropy'],
+                      metrics={'text_output': 'accuracy', 'type_output': 'accuracy', 'lang_output': 'accuracy'})
+        return model
+    def train(self, epochs=10, batch_size=32):
+        X_train, X_test, y_train_text, y_test_text, y_train_type, y_test_type, y_train_lang, y_test_lang = self.prepare_data()
+        # Reshape the target labels to match the output of the model (batch_size, 10, num_classes)
+        y_train_type = np.expand_dims(y_train_type, axis=1)
+        y_train_type = np.repeat(y_train_type, 10, axis=1)  # Repeat along the time axis
+        y_test_type = np.expand_dims(y_test_type, axis=1)
+        y_test_type = np.repeat(y_test_type, 10, axis=1)
+        y_train_lang = np.expand_dims(y_train_lang, axis=1)
+        y_train_lang = np.repeat(y_train_lang, 10, axis=1)  # Repeat along the time axis
+        y_test_lang = np.expand_dims(y_test_lang, axis=1)
+        y_test_lang = np.repeat(y_test_lang, 10, axis=1)
+        # Now, fit the model
+        self.model.fit(X_train, [y_train_text, y_train_type, y_train_lang],
+                    validation_data=(X_test, [y_test_text, y_test_type, y_test_lang]),
+                    epochs=epochs, batch_size=batch_size)
+        self.model.save("text_type_lang_model.h5")
+    def predict(self, image_path):
+        image = self.preprocess_image(image_path)
+        image = np.expand_dims(image, axis=0)
+        pred_text, pred_type, pred_lang = self.model.predict(image)
+        pred_text = ''.join(self.index_to_char.get(np.argmax(pred_text[0][i]), '') for i in range(10))
+        return pred_text.strip(), np.argmax(pred_type), np.argmax(pred_lang)
+# Initialize and train the model
+model = TextTypeLangModel("all_cropped_data_cleaned.csv")
+model.train()
+# Predict using the first image from the dataset
+first_image_path = model.df['Cropped Image Path'].iloc[0]
+predicted_text, predicted_type, predicted_language = model.predict(first_image_path)
+print("Predicted Text:", predicted_text)
+print("Predicted Type:", predicted_type)
+print("Predicted Language:", "English" if predicted_language == 0 else "Urdu")