| import os |
| import pandas as pd |
| import numpy as np |
| import tensorflow as tf |
| from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPooling2D, Concatenate, Input |
| from tensorflow.keras.models import Model |
| from tensorflow.keras.optimizers import Adam |
| from tensorflow.keras.preprocessing.image import load_img, img_to_array |
| import matplotlib.pyplot as plt |
|
|
| |
| def load_images_and_texts(image_dir, text_data, img_size=(64, 64)): |
| """Görselleri ve metin açıklamalarını yükler.""" |
| images, texts = [], [] |
| for idx, row in text_data.iterrows(): |
| img_path = os.path.join(image_dir, row['File_Name'] + '.png') |
| if os.path.exists(img_path): |
| img = load_img(img_path, target_size=img_size) |
| img_array = img_to_array(img) / 255.0 |
| images.append(img_array) |
| texts.append(row['BERT_Embeddings']) |
| return np.array(images), np.array(texts) |
|
|
| |
| def build_cnn_model(image_shape, text_dim): |
| """CNN modeli: Görsel ve metin açıklamalarını birleştirerek sınıflandırma yapar.""" |
| text_input = Input(shape=(text_dim,)) |
| img_input = Input(shape=image_shape) |
|
|
| |
| x_img = Conv2D(32, (3, 3), activation='relu', padding='same')(img_input) |
| x_img = MaxPooling2D((2, 2))(x_img) |
| x_img = Conv2D(64, (3, 3), activation='relu', padding='same')(x_img) |
| x_img = MaxPooling2D((2, 2))(x_img) |
| x_img = Flatten()(x_img) |
|
|
| |
| x_text = Dense(256, activation='relu')(text_input) |
|
|
| |
| x = Concatenate()([x_img, x_text]) |
| x = Dense(128, activation='relu')(x) |
| x = Dense(1, activation='sigmoid')(x) |
|
|
| model = Model([img_input, text_input], x, name="CNN_Model") |
| return model |
|
|
| |
| epochs = 1000 |
| batch_size = 32 |
| image_shape = (64, 64, 3) |
| text_dim = 768 |
|
|
| |
| pkl_path = '/content/drive/Othercomputers/Dizüstü Bilgisayarım/Desktop/word_embeddings_dataframe.pkl' |
| data = pd.read_pickle(pkl_path) |
|
|
| |
| image_dir = '/content/drive/Othercomputers/Dizüstü Bilgisayarım/Desktop/human_annotated_images' |
| images, texts = load_images_and_texts(image_dir, data) |
|
|
| |
| texts = np.squeeze(texts, axis=1) |
|
|
| |
| cnn_model = build_cnn_model(image_shape, text_dim) |
|
|
| |
| cnn_model.compile(optimizer=Adam(0.0002, 0.5), loss='binary_crossentropy', metrics=['accuracy']) |
|
|
| |
| def train(epochs, batch_size): |
| for epoch in range(epochs): |
| |
| idx = np.random.randint(0, images.shape[0], batch_size) |
| real_images = images[idx] |
| real_texts = texts[idx] |
| labels = np.ones((batch_size, 1)) |
|
|
| |
| loss, accuracy = cnn_model.train_on_batch([real_images, real_texts], labels) |
|
|
| |
| if epoch % 10 == 0: |
| print(f"Epoch {epoch}/{epochs} | Loss: {loss} | Accuracy: {accuracy}") |
|
|
| |
| if epoch % 100 == 0: |
| cnn_model.save(f'cnn_model_epoch_{epoch}.h5') |
|
|
| |
| train(epochs, batch_size) |
|
|
| |
| def generate_and_save_samples(cnn_model, num_samples=5): |
| idx = np.random.randint(0, images.shape[0], num_samples) |
| sample_images = images[idx] |
| sample_texts = texts[idx] |
| |
| predictions = cnn_model.predict([sample_images, sample_texts]) |
| |
| for i, img in enumerate(sample_images): |
| plt.imshow(img) |
| plt.axis('off') |
| plt.title(f"Prediction: {predictions[i]}") |
| plt.savefig(f"sample_image_{i}.png") |
|
|
| |
| generate_and_save_samples(cnn_model) |