Update app.py
Browse files
app.py
CHANGED
|
@@ -14,8 +14,9 @@ dataset = load_dataset('thefcraft/civitai-stable-diffusion-337k', split='train[:
|
|
| 14 |
|
| 15 |
# Preprocess text data
|
| 16 |
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
|
|
|
|
| 17 |
class CustomDataset(Dataset):
|
| 18 |
-
def
|
| 19 |
self.dataset = dataset
|
| 20 |
self.transform = transforms.Compose([
|
| 21 |
transforms.Resize((224, 224)),
|
|
@@ -23,48 +24,54 @@ class CustomDataset(Dataset):
|
|
| 23 |
])
|
| 24 |
self.label_encoder = LabelEncoder()
|
| 25 |
self.labels = self.label_encoder.fit_transform(dataset['Model'])
|
| 26 |
-
|
|
|
|
| 27 |
return len(self.dataset)
|
| 28 |
-
|
|
|
|
| 29 |
image = self.transform(self.dataset[idx]['image'])
|
| 30 |
text = tokenizer(self.dataset[idx]['prompt'], padding='max_length', truncation=True, return_tensors='pt')
|
| 31 |
label = self.labels[idx]
|
| 32 |
return image, text, label
|
| 33 |
-
|
| 34 |
# Define CNN for image processing
|
| 35 |
class ImageModel(nn.Module):
|
| 36 |
-
def
|
| 37 |
-
super(ImageModel, self).
|
| 38 |
self.model = models.resnet18(pretrained=True)
|
| 39 |
self.model.fc = nn.Linear(self.model.fc.in_features, 512)
|
|
|
|
| 40 |
def forward(self, x):
|
| 41 |
return self.model(x)
|
| 42 |
-
|
| 43 |
# Define MLP for text processing
|
| 44 |
class TextModel(nn.Module):
|
| 45 |
-
def
|
| 46 |
-
super(TextModel, self).
|
| 47 |
self.bert = BertModel.from_pretrained('bert-base-uncased')
|
| 48 |
self.fc = nn.Linear(768, 512)
|
|
|
|
| 49 |
def forward(self, x):
|
| 50 |
-
output = self.bert(x)
|
| 51 |
return self.fc(output.pooler_output)
|
| 52 |
-
|
| 53 |
# Combined model
|
| 54 |
class CombinedModel(nn.Module):
|
| 55 |
-
def
|
| 56 |
-
super(CombinedModel, self).
|
| 57 |
self.image_model = ImageModel()
|
| 58 |
self.text_model = TextModel()
|
| 59 |
self.fc = nn.Linear(1024, len(dataset['Model']))
|
|
|
|
| 60 |
def forward(self, image, text):
|
| 61 |
image_features = self.image_model(image)
|
| 62 |
text_features = self.text_model(text)
|
| 63 |
combined = torch.cat((image_features, text_features), dim=1)
|
| 64 |
return self.fc(combined)
|
| 65 |
-
|
| 66 |
# Instantiate model
|
| 67 |
model = CombinedModel()
|
|
|
|
| 68 |
# Define predict function
|
| 69 |
def predict(image):
|
| 70 |
model.eval()
|
|
@@ -72,16 +79,17 @@ def predict(image):
|
|
| 72 |
image = transforms.ToTensor()(image).unsqueeze(0)
|
| 73 |
image = transforms.Resize((224, 224))(image)
|
| 74 |
text_input = tokenizer("Sample prompt", return_tensors='pt', padding=True, truncation=True)
|
| 75 |
-
output = model(image,
|
| 76 |
-
, indices = torch.topk(output, 5)
|
| 77 |
recommended_models = [dataset['Model'][i] for i in indices[0]]
|
| 78 |
return recommended_models
|
| 79 |
-
|
| 80 |
# Set up Gradio interface
|
| 81 |
interface = gr.Interface(fn=predict,
|
| 82 |
inputs=gr.Image(type="pil"),
|
| 83 |
outputs=gr.Textbox(label="Recommended Models"),
|
| 84 |
title="AI Image Model Recommender",
|
| 85 |
description="Upload an AI-generated image to receive model recommendations.")
|
|
|
|
| 86 |
# Launch the app
|
| 87 |
interface.launch()
|
|
|
|
| 14 |
|
| 15 |
# Preprocess text data
|
| 16 |
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
|
| 17 |
+
|
| 18 |
class CustomDataset(Dataset):
|
| 19 |
+
def __init__(self, dataset):
|
| 20 |
self.dataset = dataset
|
| 21 |
self.transform = transforms.Compose([
|
| 22 |
transforms.Resize((224, 224)),
|
|
|
|
| 24 |
])
|
| 25 |
self.label_encoder = LabelEncoder()
|
| 26 |
self.labels = self.label_encoder.fit_transform(dataset['Model'])
|
| 27 |
+
|
| 28 |
+
def __len__(self):
|
| 29 |
return len(self.dataset)
|
| 30 |
+
|
| 31 |
+
def __getitem__(self, idx):
|
| 32 |
image = self.transform(self.dataset[idx]['image'])
|
| 33 |
text = tokenizer(self.dataset[idx]['prompt'], padding='max_length', truncation=True, return_tensors='pt')
|
| 34 |
label = self.labels[idx]
|
| 35 |
return image, text, label
|
| 36 |
+
|
| 37 |
# Define CNN for image processing
|
| 38 |
class ImageModel(nn.Module):
|
| 39 |
+
def __init__(self):
|
| 40 |
+
super(ImageModel, self).__init__()
|
| 41 |
self.model = models.resnet18(pretrained=True)
|
| 42 |
self.model.fc = nn.Linear(self.model.fc.in_features, 512)
|
| 43 |
+
|
| 44 |
def forward(self, x):
|
| 45 |
return self.model(x)
|
| 46 |
+
|
| 47 |
# Define MLP for text processing
|
| 48 |
class TextModel(nn.Module):
|
| 49 |
+
def __init__(self):
|
| 50 |
+
super(TextModel, self).__init__()
|
| 51 |
self.bert = BertModel.from_pretrained('bert-base-uncased')
|
| 52 |
self.fc = nn.Linear(768, 512)
|
| 53 |
+
|
| 54 |
def forward(self, x):
|
| 55 |
+
output = self.bert(**x)
|
| 56 |
return self.fc(output.pooler_output)
|
| 57 |
+
|
| 58 |
# Combined model
|
| 59 |
class CombinedModel(nn.Module):
|
| 60 |
+
def __init__(self):
|
| 61 |
+
super(CombinedModel, self).__init__()
|
| 62 |
self.image_model = ImageModel()
|
| 63 |
self.text_model = TextModel()
|
| 64 |
self.fc = nn.Linear(1024, len(dataset['Model']))
|
| 65 |
+
|
| 66 |
def forward(self, image, text):
|
| 67 |
image_features = self.image_model(image)
|
| 68 |
text_features = self.text_model(text)
|
| 69 |
combined = torch.cat((image_features, text_features), dim=1)
|
| 70 |
return self.fc(combined)
|
| 71 |
+
|
| 72 |
# Instantiate model
|
| 73 |
model = CombinedModel()
|
| 74 |
+
|
| 75 |
# Define predict function
|
| 76 |
def predict(image):
|
| 77 |
model.eval()
|
|
|
|
| 79 |
image = transforms.ToTensor()(image).unsqueeze(0)
|
| 80 |
image = transforms.Resize((224, 224))(image)
|
| 81 |
text_input = tokenizer("Sample prompt", return_tensors='pt', padding=True, truncation=True)
|
| 82 |
+
output = model(image, text_input)
|
| 83 |
+
_, indices = torch.topk(output, 5)
|
| 84 |
recommended_models = [dataset['Model'][i] for i in indices[0]]
|
| 85 |
return recommended_models
|
| 86 |
+
|
| 87 |
# Set up Gradio interface
|
| 88 |
interface = gr.Interface(fn=predict,
|
| 89 |
inputs=gr.Image(type="pil"),
|
| 90 |
outputs=gr.Textbox(label="Recommended Models"),
|
| 91 |
title="AI Image Model Recommender",
|
| 92 |
description="Upload an AI-generated image to receive model recommendations.")
|
| 93 |
+
|
| 94 |
# Launch the app
|
| 95 |
interface.launch()
|