Spaces:

bgaspra
/

CNN_MLP

Build error

App Files Files Community

bgaspra commited on Nov 13, 2024

Commit

906c9b0

verified ·

1 Parent(s): 0365b37

Update app.py

Browse files

Files changed (1) hide show

app.py +163 -128

app.py CHANGED Viewed

@@ -1,143 +1,178 @@
-import gradio as gr
-import torch
-import torch.nn as nn
-import torchvision.transforms as transforms
-from torchvision import models
-from transformers import BertTokenizer, BertModel
-import pandas as pd
-from datasets import load_dataset
-from torch.utils.data import DataLoader, Dataset
-from sklearn.preprocessing import LabelEncoder
 import requests
-from PIL import Image
-from io import BytesIO
 import numpy as np
-# Load dataset
-dataset = load_dataset('thefcraft/civitai-stable-diffusion-337k', split='train[:10000]')
-# Download and cache images
-def download_image(url):
-    try:
-        response = requests.get(url)
-        img = Image.open(BytesIO(response.content))
-        return img
-    except:
-        return None
-# Create image cache
-image_cache = {}
-for idx, item in enumerate(dataset):
-    if idx % 100 == 0:  # Status update
-        print(f"Downloaded {idx} images")
-    url = item['url']
-    if url not in image_cache:
-        img = download_image(url)
-        if img is not None:
-            image_cache[url] = img
-# Preprocess text data
-tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
-class CustomDataset(Dataset):
-    def __init__(self, dataset, image_cache):
-        self.dataset = dataset
-        self.image_cache = image_cache
-        self.transform = transforms.Compose([
-            transforms.Resize((224, 224)),
-            transforms.ToTensor(),
-        ])
-        self.label_encoder = LabelEncoder()
-        self.labels = self.label_encoder.fit_transform(dataset['Model'])
-    def __len__(self):
-        return len(self.dataset)
-    def __getitem__(self, idx):
-        url = self.dataset[idx]['url']
-        image = self.transform(self.image_cache[url])
-        text = tokenizer(self.dataset[idx]['prompt'],
-                        padding='max_length',
-                        truncation=True,
-                        return_tensors='pt')
-        label = self.labels[idx]
-        return image, text, label
-# Model definitions remain the same
-class ImageModel(nn.Module):
-    def __init__(self):
-        super(ImageModel, self).__init__()
-        self.model = models.resnet18(pretrained=True)
-        self.model.fc = nn.Linear(self.model.fc.in_features, 512)
-    def forward(self, x):
-        return self.model(x)
-class TextModel(nn.Module):
-    def __init__(self):
-        super(TextModel, self).__init__()
-        self.bert = BertModel.from_pretrained('bert-base-uncased')
-        self.fc = nn.Linear(768, 512)
-    def forward(self, x):
-        output = self.bert(**x)
-        return self.fc(output.pooler_output)
-class CombinedModel(nn.Module):
-    def __init__(self):
-        super(CombinedModel, self).__init__()
-        self.image_model = ImageModel()
-        self.text_model = TextModel()
-        self.fc = nn.Linear(1024, len(dataset['Model']))
-    def forward(self, image, text):
-        image_features = self.image_model(image)
-        text_features = self.text_model(text)
-        combined = torch.cat((image_features, text_features), dim=1)
-        return self.fc(combined)
-# Instantiate model
-model = CombinedModel()
-# Modified prediction function
-def get_recommendations(input_image):
-    model.eval()
-    with torch.no_grad():
-        # Process input image
-        transform = transforms.Compose([
-            transforms.Resize((224, 224)),
-            transforms.ToTensor()
-        ])
-        input_tensor = transform(input_image).unsqueeze(0)
-        # Get dummy text input (since we're focusing on image similarity)
-        text_input = tokenizer("", return_tensors='pt', padding=True, truncation=True)
-        # Get model output
-        output = model(input_tensor, text_input)
-        scores, indices = torch.topk(output, 5)
-        # Prepare gallery output
-        gallery_images = []
-        for idx in indices[0]:
-            url = dataset[idx]['url']
-            model_name = dataset[idx]['Model']
-            score = scores[0][idx].item()
-            # Get image from cache
-            if url in image_cache:
-                gallery_images.append((image_cache[url], f"{model_name}\nScore: {score:.2f}"))
-        return gallery_images
-# Set up Gradio interface
 interface = gr.Interface(
     fn=get_recommendations,
-    inputs=gr.Image(type="pil"),
     outputs=gr.Gallery(label="Recommended Images"),
-    title="Image Recommendation System",
-    description="Upload an image and get similar images with their model names and distances."
 )
-# Launch the app
-interface.launch()

+import os
 import requests
+from tqdm import tqdm
+from datasets import load_dataset
 import numpy as np
+from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input
+from tensorflow.keras.preprocessing import image
+from sklearn.neighbors import NearestNeighbors
+import joblib
+from PIL import UnidentifiedImageError, Image
+import gradio as gr
+from tensorflow.keras.models import Sequential
+from tensorflow.keras.layers import Dense, Dropout
+from tensorflow.keras.preprocessing.text import Tokenizer
+from tensorflow.keras.preprocessing.sequence import pad_sequences
+# Load the dataset
+dataset = load_dataset("thefcraft/civitai-stable-diffusion-337k")
+# Filter out NSFW content and null models
+dataset_filtered = dataset['train'].filter(
+    lambda x: not x['nsfw'] and x['Model'] is not None and x['Model'].strip() != ''
+)
+# Take a subset of the filtered dataset
+subset_size = 2700
+dataset_subset = dataset_filtered.shuffle(seed=42).select(range(subset_size))
+# Directory to save images
+image_dir = 'civitai_images'
+os.makedirs(image_dir, exist_ok=True)
+# Load the ResNet50 model pretrained on ImageNet
+cnn_model = ResNet50(weights='imagenet', include_top=False, pooling='avg')
+# Text processing setup
+max_words = 10000  # Maximum number of words to keep
+max_len = 100      # Maximum length of each text sequence
+# Initialize and fit tokenizer on prompts
+tokenizer = Tokenizer(num_words=max_words)
+prompts = [sample['prompt'] for sample in dataset_subset]
+tokenizer.fit_on_texts(prompts)
+# Create MLP model for text processing
+def create_mlp_model(input_dim):
+    model = Sequential([
+        Dense(256, activation='relu', input_dim=input_dim),
+        Dropout(0.3),
+        Dense(128, activation='relu'),
+        Dropout(0.2),
+        Dense(64, activation='relu'),
+        Dense(32, activation='relu')
+    ])
+    return model
+# Function to extract text features
+def extract_text_features(prompt):
+    # Convert text to sequence and pad
+    sequence = tokenizer.texts_to_sequences([prompt])
+    padded = pad_sequences(sequence, maxlen=max_len)
+    # Get features from MLP
+    return mlp_model.predict(padded)
+# Function to extract image features
+def extract_image_features(img_path, model):
+    img = image.load_img(img_path, target_size=(224, 224))
+    img_array = image.img_to_array(img)
+    img_array = np.expand_dims(img_array, axis=0)
+    img_array = preprocess_input(img_array)
+    features = model.predict(img_array)
+    return features.flatten()
+# Prepare text data
+text_sequences = tokenizer.texts_to_sequences(prompts)
+padded_sequences = pad_sequences(text_sequences, maxlen=max_len)
+# Create and train MLP model
+mlp_model = create_mlp_model(max_len)
+mlp_model.compile(optimizer='adam', loss='mse')
+mlp_model.fit(padded_sequences, padded_sequences, epochs=5, batch_size=32, validation_split=0.2)
+# Extract features for both images and text
+image_features = []
+text_features = []
+image_paths = []
+model_names = []
+for sample in tqdm(dataset_subset):
+    img_url = sample['url']
+    model_name = sample['Model']
+    prompt = sample['prompt']
+    img_path = os.path.join(image_dir, os.path.basename(img_url))
+    try:
+        # Download and process image
+        response = requests.get(img_url)
+        response.raise_for_status()
+        if 'image' not in response.headers['Content-Type']:
+            raise ValueError("URL does not contain an image")
+        with open(img_path, 'wb') as f:
+            f.write(response.content)
+        # Extract image features
+        img_features = extract_image_features(img_path, cnn_model)
+        # Extract text features
+        txt_features = extract_text_features(prompt)
+        # Store features and metadata
+        image_features.append(img_features)
+        text_features.append(txt_features.flatten())
+        image_paths.append(img_path)
+        model_names.append(model_name)
+    except (UnidentifiedImageError, requests.exceptions.RequestException) as e:
+        print(f"Error processing {img_url}: {e}")
+        if os.path.exists(img_path):
+            os.remove(img_path)
+# Convert features to numpy arrays
+image_features = np.array(image_features)
+text_features = np.array(text_features)
+# Combine image and text features
+combined_features = np.concatenate([image_features, text_features], axis=1)
+# Build the NearestNeighbors model
+nbrs = NearestNeighbors(n_neighbors=5, algorithm='ball_tree').fit(combined_features)
+# Save models and features
+joblib.dump(nbrs, 'nearest_neighbors_model.pkl')
+joblib.dump(mlp_model, 'mlp_model.pkl')
+joblib.dump(tokenizer, 'tokenizer.pkl')
+np.save('combined_features.npy', combined_features)
+np.save('image_paths.npy', image_paths)
+np.save('model_names.npy', model_names)
+# Function to get recommendations
+def get_recommendations(img, prompt="", n_neighbors=5):
+    # Process input image
+    img_path = "temp_input_image.png"
+    img.save(img_path)
+    img_features = extract_image_features(img_path, cnn_model)
+    # Process input text
+    txt_features = extract_text_features(prompt)
+    # Combine features
+    input_features = np.concatenate([img_features, txt_features.flatten()])
+    # Get recommendations
+    distances, indices = nbrs.kneighbors([input_features])
+    recommended_images = [image_paths[idx] for idx in indices.flatten()]
+    recommended_model_names = [model_names[idx] for idx in indices.flatten()]
+    recommended_distances = distances.flatten()
+    return [(Image.open(img_path), f'{name}, Distance: {dist:.2f}')
+            for img_path, name, dist in zip(recommended_images, recommended_model_names, recommended_distances)]
+# Gradio interface
 interface = gr.Interface(
     fn=get_recommendations,
+    inputs=[
+        gr.Image(type="pil"),
+        gr.Textbox(label="Prompt")
+    ],
     outputs=gr.Gallery(label="Recommended Images"),
+    title="Image and Text Recommendation System",
+    description="Upload an image and/or enter a prompt to get similar images with their model names and distances."
 )
+if __name__ == "__main__":
+    interface.launch()