Spaces:

Umer2762
/

Doctor_Handwriting_Text_Detection

Sleeping

App Files Files Community

Doctor_Handwriting_Text_Detection / train_from_scratch.py

Umer2762

Upload folder using huggingface_hub

5626a1a verified 12 months ago

raw

history blame contribute delete

5.47 kB

	import pandas as pd
	import numpy as np
	import tensorflow as tf
	from keras import Model, Input
	from keras.api.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, Reshape, TimeDistributed
	from sklearn.model_selection import train_test_split
	from keras.api.utils import to_categorical
	from PIL import Image
	import string

	class TextTypeLangModel:
	def __init__(self, csv_path):
	self.df = pd.read_csv(csv_path)
	self.characters = string.ascii_letters + string.digits + " " + \
	"آبپچڈڑڤکگہہٹژزسٹطظعغفقکگلاںمںنۓہھوؤ" + \
	"ےیئؤٹپجچحخدڈذرزسشصضطظعغفقکلمنوٕں" + \
	"ۓۓہ۔،؛؟"

	self.num_chars = len(self.characters) + 1 # Extra for blank
	self.char_to_index = {c: i+1 for i, c in enumerate(self.characters)}
	self.index_to_char = {i+1: c for i, c in enumerate(self.characters)}
	self.model = self.build_model()

	def encode_text(self, text, max_len=10):
	text = text[:max_len].ljust(max_len) # Pad or trim text
	return [self.char_to_index.get(c, 0) for c in text] # Convert to indices

	def preprocess_image(self, image_path):
	image = Image.open(image_path).convert("RGB")
	image = image.resize((128, 128))
	image = np.array(image) / 255.0 # Normalize
	return image

	def prepare_data(self):
	X_images = np.array([self.preprocess_image(img) for img in self.df['Cropped Image Path']])
	y_text = np.array([self.encode_text(txt) for txt in self.df['Text']])
	y_type = to_categorical(self.df['type'].values, num_classes=7)
	y_lang = to_categorical(self.df['language'].values, num_classes=2)
	return train_test_split(X_images, to_categorical(y_text, self.num_chars), y_type, y_lang, test_size=0.2, random_state=42)

	def build_model(self):
	input_layer = Input(shape=(128, 128, 3))
	x = Conv2D(32, (3, 3), activation='relu')(input_layer)
	x = MaxPooling2D()(x)
	x = Conv2D(64, (3, 3), activation='relu')(x)
	x = MaxPooling2D()(x)
	x = Flatten()(x)

	# Increase the number of units in Dense to match the required total number of features
	x = Dense(1280, activation='relu')(x) # Set to 1280 units (10 * 128) to match reshape size

	# Reshape the output to have 10 time steps and each time step has 128 features
	x = Reshape((10, 128))(x) # Shape (batch_size, 10, 128)

	# TimeDistributed applied to Dense layer for text output (10-character sequence)
	time_distributed_text_output = TimeDistributed(Dense(self.num_chars, activation='softmax'))(x)
	text_output = Reshape((10, self.num_chars), name='text_output')(time_distributed_text_output) # 10-character output

	# Type and language prediction
	output_type = Dense(7, activation='softmax', name='type_output')(x)
	output_lang = Dense(2, activation='softmax', name='lang_output')(x)

	model = Model(inputs=input_layer, outputs=[text_output, output_type, output_lang])
	model.compile(optimizer='adam',
	loss=['categorical_crossentropy', 'categorical_crossentropy', 'categorical_crossentropy'],
	metrics={'text_output': 'accuracy', 'type_output': 'accuracy', 'lang_output': 'accuracy'})
	return model

	def train(self, epochs=10, batch_size=32):
	X_train, X_test, y_train_text, y_test_text, y_train_type, y_test_type, y_train_lang, y_test_lang = self.prepare_data()

	# Reshape the target labels to match the output of the model (batch_size, 10, num_classes)
	y_train_type = np.expand_dims(y_train_type, axis=1)
	y_train_type = np.repeat(y_train_type, 10, axis=1) # Repeat along the time axis

	y_test_type = np.expand_dims(y_test_type, axis=1)
	y_test_type = np.repeat(y_test_type, 10, axis=1)

	y_train_lang = np.expand_dims(y_train_lang, axis=1)
	y_train_lang = np.repeat(y_train_lang, 10, axis=1) # Repeat along the time axis

	y_test_lang = np.expand_dims(y_test_lang, axis=1)
	y_test_lang = np.repeat(y_test_lang, 10, axis=1)

	# Now, fit the model
	self.model.fit(X_train, [y_train_text, y_train_type, y_train_lang],
	validation_data=(X_test, [y_test_text, y_test_type, y_test_lang]),
	epochs=epochs, batch_size=batch_size)
	self.model.save("text_type_lang_model.h5")

	def predict(self, image_path):
	image = self.preprocess_image(image_path)
	image = np.expand_dims(image, axis=0)
	pred_text, pred_type, pred_lang = self.model.predict(image)
	pred_text = ''.join(self.index_to_char.get(np.argmax(pred_text[0][i]), '') for i in range(10))
	return pred_text.strip(), np.argmax(pred_type), np.argmax(pred_lang)


	# Initialize and train the model
	model = TextTypeLangModel("all_cropped_data_cleaned.csv")
	model.train()

	# Predict using the first image from the dataset
	first_image_path = model.df['Cropped Image Path'].iloc[0]
	predicted_text, predicted_type, predicted_language = model.predict(first_image_path)

	print("Predicted Text:", predicted_text)
	print("Predicted Type:", predicted_type)
	print("Predicted Language:", "English" if predicted_language == 0 else "Urdu")