selvaneyas's picture
Upload 7 files
e619b9a verified
# src/dataloader.py
import tensorflow as tf
import pandas as pd
import cv2
import numpy as np
import os
IMG_SIZE = (128,128)
def load_image(path):
path = path.decode("utf-8")
if not os.path.exists(path):
return np.zeros((128, 128, 1), dtype=np.float32)
img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
if img is None:
return np.zeros((128, 128, 1), dtype=np.float32)
img = cv2.resize(img, (128, 128))
img = img / 255.0
# VERY IMPORTANT: add channel dimension
img = np.expand_dims(img, axis=-1)
return img.astype(np.float32)
def parse_pair(img1_path, img2_path, label):
img1 = tf.numpy_function(load_image, [img1_path], tf.float32)
img2 = tf.numpy_function(load_image, [img2_path], tf.float32)
label = tf.cast(label, tf.float32)
img1.set_shape((*IMG_SIZE, 1))
img2.set_shape((*IMG_SIZE, 1))
return (img1, img2), label
def create_dataset(csv_file, batch_size=16, validation_split=0.2):
df = pd.read_csv(csv_file)
# Shuffle once
df = df.sample(frac=1).reset_index(drop=True)
split_idx = int(len(df) * (1 - validation_split))
train_df = df[:split_idx]
val_df = df[split_idx:]
def make_ds(dataframe):
ds = tf.data.Dataset.from_tensor_slices(
(dataframe["img1"], dataframe["img2"], dataframe["label"])
)
ds = ds.map(parse_pair, num_parallel_calls=tf.data.AUTOTUNE)
ds = ds.batch(batch_size)
ds = ds.cache().prefetch(tf.data.AUTOTUNE)
return ds
return make_ds(train_df), make_ds(val_df)