File size: 2,124 Bytes
0334c3d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 |
import torch
import torch.nn as nn
from torchvision import models, transforms
from PIL import Image
import gradio as gr
# π¦ Class names
class_names = [
"accordion",
"banjo",
"drum",
"flute",
"guitar",
"harmonica",
"saxophone",
"sitar",
"tabla",
"violin"
]
# π Transformations (same as during training)
transform = transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406],
[0.229, 0.224, 0.225])
])
# π§ Load model with enhanced FC head
def load_model(model_path="music_model.pth"):
model = models.resnet18(weights=None)
# β¨ Enhanced classifier head (512 β 256 β 10)
model.fc = nn.Sequential(
nn.Linear(model.fc.in_features, 256),
nn.ReLU(),
nn.Dropout(0.4),
nn.Linear(256, 10)
)
model.load_state_dict(torch.load(model_path, map_location=torch.device('cpu')))
model.eval()
return model
model = load_model("music_model.pth")
# π Prediction function
def predict(image):
image = Image.fromarray(image).convert("RGB")
img_tensor = transform(image).unsqueeze(0)
with torch.no_grad():
outputs = model(img_tensor)
_, predicted = torch.max(outputs, 1)
prediction = class_names[predicted.item()]
confidences = torch.nn.functional.softmax(outputs[0], dim=0)
confidences_dict = {class_names[i]: float(confidences[i]) for i in range(10)}
return prediction, confidences_dict
# ποΈ Gradio Interface
interface = gr.Interface(
fn=predict,
inputs=gr.Image(type="numpy", label="Upload Instrument Image"),
outputs=[
gr.Label(label="Predicted Instrument"),
gr.Label(label="Confidence Scores")
],
title="π΅ Musical Instrument Classifier",
description="Upload an image of a musical instrument and get the predicted class (accordion, guitar, etc.)"
)
# π Launch the app
if __name__ == "__main__":
interface.launch()
|