ma4389 commited on
Commit
0334c3d
Β·
verified Β·
1 Parent(s): 95200ae

Upload 5 files

Browse files
Files changed (5) hide show
  1. app (2).py +67 -0
  2. app.py +74 -0
  3. music_model.pth +3 -0
  4. music_model2.pth +3 -0
  5. requirements.txt +4 -0
app (2).py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ from torchvision import models, transforms
4
+ from PIL import Image
5
+ import gradio as gr
6
+ import os
7
+
8
+ # πŸ“¦ Class names
9
+ class_names = [
10
+ "accordion",
11
+ "banjo",
12
+ "drum",
13
+ "flute",
14
+ "guitar",
15
+ "harmonica",
16
+ "saxophone",
17
+ "sitar",
18
+ "tabla",
19
+ "violin"
20
+ ]
21
+
22
+ # πŸ“ Transformations (same as during training)
23
+ transform = transforms.Compose([
24
+ transforms.Resize(256),
25
+ transforms.CenterCrop(224),
26
+ transforms.ToTensor(),
27
+ transforms.Normalize([0.485, 0.456, 0.406],
28
+ [0.229, 0.224, 0.225])
29
+ ])
30
+
31
+ # 🧠 Load model
32
+ def load_model(model_path="music_model.pth"):
33
+ model = models.resnet18(weights=None)
34
+ model.fc = nn.Linear(model.fc.in_features, len(class_names))
35
+ model.load_state_dict(torch.load(model_path, map_location=torch.device('cpu')))
36
+ model.eval()
37
+ return model
38
+
39
+ model = load_model("music_model.pth")
40
+
41
+ # πŸ” Prediction function
42
+ def predict(image):
43
+ image = Image.fromarray(image).convert("RGB")
44
+ img_tensor = transform(image).unsqueeze(0)
45
+ with torch.no_grad():
46
+ outputs = model(img_tensor)
47
+ _, predicted = torch.max(outputs, 1)
48
+ prediction = class_names[predicted.item()]
49
+ confidences = torch.nn.functional.softmax(outputs[0], dim=0)
50
+ confidences_dict = {class_names[i]: float(confidences[i]) for i in range(len(class_names))}
51
+ return prediction, confidences_dict
52
+
53
+ # πŸŽ›οΈ Gradio Interface
54
+ interface = gr.Interface(
55
+ fn=predict,
56
+ inputs=gr.Image(type="numpy", label="Upload Instrument Image"),
57
+ outputs=[
58
+ gr.Label(label="Predicted Instrument"),
59
+ gr.Label(label="Confidence Scores")
60
+ ],
61
+ title="🎡 Musical Instrument Classifier",
62
+ description="Upload an image of a musical instrument and get the predicted class (accordion, guitar, etc.)"
63
+ )
64
+
65
+ # πŸš€ Launch the app
66
+ if __name__ == "__main__":
67
+ interface.launch()
app.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ from torchvision import models, transforms
4
+ from PIL import Image
5
+ import gradio as gr
6
+
7
+ # πŸ“¦ Class names
8
+ class_names = [
9
+ "accordion",
10
+ "banjo",
11
+ "drum",
12
+ "flute",
13
+ "guitar",
14
+ "harmonica",
15
+ "saxophone",
16
+ "sitar",
17
+ "tabla",
18
+ "violin"
19
+ ]
20
+
21
+ # πŸ“ Transformations (same as during training)
22
+ transform = transforms.Compose([
23
+ transforms.Resize(256),
24
+ transforms.CenterCrop(224),
25
+ transforms.ToTensor(),
26
+ transforms.Normalize([0.485, 0.456, 0.406],
27
+ [0.229, 0.224, 0.225])
28
+ ])
29
+
30
+ # 🧠 Load model with enhanced FC head
31
+ def load_model(model_path="music_model.pth"):
32
+ model = models.resnet18(weights=None)
33
+
34
+ # ✨ Enhanced classifier head (512 β†’ 256 β†’ 10)
35
+ model.fc = nn.Sequential(
36
+ nn.Linear(model.fc.in_features, 256),
37
+ nn.ReLU(),
38
+ nn.Dropout(0.4),
39
+ nn.Linear(256, 10)
40
+ )
41
+
42
+ model.load_state_dict(torch.load(model_path, map_location=torch.device('cpu')))
43
+ model.eval()
44
+ return model
45
+
46
+ model = load_model("music_model.pth")
47
+
48
+ # πŸ” Prediction function
49
+ def predict(image):
50
+ image = Image.fromarray(image).convert("RGB")
51
+ img_tensor = transform(image).unsqueeze(0)
52
+ with torch.no_grad():
53
+ outputs = model(img_tensor)
54
+ _, predicted = torch.max(outputs, 1)
55
+ prediction = class_names[predicted.item()]
56
+ confidences = torch.nn.functional.softmax(outputs[0], dim=0)
57
+ confidences_dict = {class_names[i]: float(confidences[i]) for i in range(10)}
58
+ return prediction, confidences_dict
59
+
60
+ # πŸŽ›οΈ Gradio Interface
61
+ interface = gr.Interface(
62
+ fn=predict,
63
+ inputs=gr.Image(type="numpy", label="Upload Instrument Image"),
64
+ outputs=[
65
+ gr.Label(label="Predicted Instrument"),
66
+ gr.Label(label="Confidence Scores")
67
+ ],
68
+ title="🎡 Musical Instrument Classifier",
69
+ description="Upload an image of a musical instrument and get the predicted class (accordion, guitar, etc.)"
70
+ )
71
+
72
+ # πŸš€ Launch the app
73
+ if __name__ == "__main__":
74
+ interface.launch()
music_model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec98307f0073d0f85cfc07741f70c1669d4e875d188c739264cf74f3fbc20a6c
3
+ size 45321530
music_model2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e51d1804a88148ea0acfef630517d68c210a5daeb52ea146a3607d3847f8bf51
3
+ size 44805752
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ gradio>=4.0.0
2
+ torch>=2.0.0
3
+ torchvision>=0.15.0
4
+ Pillow>=9.0.0