File size: 2,124 Bytes
0334c3d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import torch
import torch.nn as nn
from torchvision import models, transforms
from PIL import Image
import gradio as gr

# πŸ“¦ Class names
class_names = [
    "accordion",
    "banjo",
    "drum",
    "flute",
    "guitar",
    "harmonica",
    "saxophone",
    "sitar",
    "tabla",
    "violin"
]

# πŸ“ Transformations (same as during training)
transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

# 🧠 Load model with enhanced FC head
def load_model(model_path="music_model.pth"):
    model = models.resnet18(weights=None)

    # ✨ Enhanced classifier head (512 β†’ 256 β†’ 10)
    model.fc = nn.Sequential(
        nn.Linear(model.fc.in_features, 256),
        nn.ReLU(),
        nn.Dropout(0.4),
        nn.Linear(256, 10)
    )

    model.load_state_dict(torch.load(model_path, map_location=torch.device('cpu')))
    model.eval()
    return model

model = load_model("music_model.pth")

# πŸ” Prediction function
def predict(image):
    image = Image.fromarray(image).convert("RGB")
    img_tensor = transform(image).unsqueeze(0)
    with torch.no_grad():
        outputs = model(img_tensor)
        _, predicted = torch.max(outputs, 1)
        prediction = class_names[predicted.item()]
        confidences = torch.nn.functional.softmax(outputs[0], dim=0)
        confidences_dict = {class_names[i]: float(confidences[i]) for i in range(10)}
    return prediction, confidences_dict

# πŸŽ›οΈ Gradio Interface
interface = gr.Interface(
    fn=predict,
    inputs=gr.Image(type="numpy", label="Upload Instrument Image"),
    outputs=[
        gr.Label(label="Predicted Instrument"),
        gr.Label(label="Confidence Scores")
    ],
    title="🎡 Musical Instrument Classifier",
    description="Upload an image of a musical instrument and get the predicted class (accordion, guitar, etc.)"
)

# πŸš€ Launch the app
if __name__ == "__main__":
    interface.launch()