Spaces:
Sleeping
Sleeping
Deploy Advanced 2025 model (92.16% accuracy)
Browse files- .gitattributes +1 -0
- README.md +53 -12
- app.py +298 -0
- models/cat_classifier_advanced.keras +3 -0
- models/model_info_advanced.json +31 -0
- requirements.txt +6 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
models/cat_classifier_advanced.keras filter=lfs diff=lfs merge=lfs -text
|
README.md
CHANGED
|
@@ -1,12 +1,53 @@
|
|
| 1 |
-
---
|
| 2 |
-
title:
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
-
sdk: gradio
|
| 7 |
-
sdk_version: 5.
|
| 8 |
-
app_file: app.py
|
| 9 |
-
pinned: false
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: Cat Emotion Translator Advanced 2025
|
| 3 |
+
emoji: ๐ฑ
|
| 4 |
+
colorFrom: purple
|
| 5 |
+
colorTo: pink
|
| 6 |
+
sdk: gradio
|
| 7 |
+
sdk_version: 5.9.1
|
| 8 |
+
app_file: app.py
|
| 9 |
+
pinned: false
|
| 10 |
+
license: mit
|
| 11 |
+
---
|
| 12 |
+
|
| 13 |
+
# ๐ฑ ๊ณ ์์ด ๊ฐ์ ๋ฒ์ญ๊ธฐ - Advanced 2025
|
| 14 |
+
|
| 15 |
+
2024-2025 ์ต์ AI ๊ธฐ๋ฒ์ผ๋ก ํ๋ จ๋ ๊ณ ์์ด ๊ฐ์ ๋ถ์๊ธฐ!
|
| 16 |
+
|
| 17 |
+
## โจ ํต์ฌ ์ฑ๋ฅ
|
| 18 |
+
|
| 19 |
+
- **92.16% ์ค์ ์ ํ๋** (51๊ฐ ์ํ ๊ฒ์ฆ)
|
| 20 |
+
- **๊ฒฉ๋ฆฌ/์ธ๋ก์: 100% ์๋ฒฝ ์ธ์**
|
| 21 |
+
- **๋จน์ด ๋๊ธฐ: 94.1% ์ ํ**
|
| 22 |
+
- **๋น์ง: 82.4% ์ ํ**
|
| 23 |
+
|
| 24 |
+
## ๐ฏ ์ฌ์ฉ ๋ฐฉ๋ฒ
|
| 25 |
+
|
| 26 |
+
1. ๐ค ๊ณ ์์ด ์๋ฆฌ ๋
น์ ๋๋ ์
๋ก๋ (0.5-3์ด)
|
| 27 |
+
2. ๐ "๊ฐ์ ๋ถ์ํ๊ธฐ" ํด๋ฆญ
|
| 28 |
+
3. ๐ ๊ฒฐ๊ณผ ํ์ธ
|
| 29 |
+
|
| 30 |
+
## ๐ง ๊ธฐ์ ์คํ
|
| 31 |
+
|
| 32 |
+
- **๋ชจ๋ธ**: YAMNet + 5์ธต ์ฌ์ธต ์ ๊ฒฝ๋ง (1.36M ํ๋ผ๋ฏธํฐ)
|
| 33 |
+
- **์ฆ๊ฐ**: 19๊ฐ์ง ๊ณ ๊ธ ๊ธฐ๋ฒ + Mixup
|
| 34 |
+
- **ํ์ต**: Focal Loss + Cosine LR Decay
|
| 35 |
+
- **๋ฐ์ดํฐ**: CatMeows ๋ฐ์ดํฐ์
(440๊ฐ ์๋ณธ โ 2,200๊ฐ ์ฆ๊ฐ)
|
| 36 |
+
|
| 37 |
+
## ๐ ์ปจํ
์คํธ
|
| 38 |
+
|
| 39 |
+
| ์ด๋ชจ์ง | ์ปจํ
์คํธ | ์ค๋ช
|
|
| 40 |
+
|-------|---------|------|
|
| 41 |
+
| ๐ฝ๏ธ | ๋จน์ด ๋๊ธฐ | ๋ฐฐ๊ณ ํ๊ฑฐ๋ ๋จน์ด๋ฅผ ๊ธฐ๋ค๋ฆผ |
|
| 42 |
+
| ๐บ | ๋น์ง | ๊ทธ๋ฃจ๋ฐ ๋ฐ์ผ๋ฉฐ ํธ์ํจ |
|
| 43 |
+
| ๐ฟ | ๊ฒฉ๋ฆฌ/์ธ๋ก์ | ์ธ๋ก์, ๊ด์ฌ ํ์ |
|
| 44 |
+
|
| 45 |
+
## ๐ ์ฑ๊ณผ
|
| 46 |
+
|
| 47 |
+
- โ
10% ์ฑ๋ฅ ํฅ์ (์ด์ ๋ชจ๋ธ ๋๋น)
|
| 48 |
+
- โ
ICLR 2025 Mixup ์ ์ฉ
|
| 49 |
+
- โ
2024-2025 SOTA ๊ธฐ๋ฒ ์ฑ๊ณต
|
| 50 |
+
|
| 51 |
+
---
|
| 52 |
+
|
| 53 |
+
**๊ฐ๋ฐ:** PlayCat Korea | **๋ ์ง:** 2025-11-17 | **๋ฒ์ :** v3.0
|
app.py
ADDED
|
@@ -0,0 +1,298 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Cat Translator - Advanced 2025 Version
|
| 3 |
+
- ๊ณ ๊ธ ์ฆ๊ฐ ๊ธฐ๋ฒ ์ ์ฉ (19๊ฐ์ง)
|
| 4 |
+
- Mixup ๋ฐ์ดํฐ ์์ฑ
|
| 5 |
+
- 5์ธต ์ฌ์ธต ์ํคํ
์ฒ
|
| 6 |
+
- 96.7% ํ
์คํธ ์ ํ๋
|
| 7 |
+
- 3๊ฐ์ง ์ปจํ
์คํธ ๋ถ๋ฅ (๋จน์ด, ๋น์ง, ๊ฒฉ๋ฆฌ)
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
import gradio as gr
|
| 11 |
+
import tensorflow as tf
|
| 12 |
+
import tensorflow_hub as hub
|
| 13 |
+
import numpy as np
|
| 14 |
+
import librosa
|
| 15 |
+
import json
|
| 16 |
+
import os
|
| 17 |
+
|
| 18 |
+
# Configuration
|
| 19 |
+
try:
|
| 20 |
+
with open('models/model_info_advanced.json', 'r', encoding='utf-8') as f:
|
| 21 |
+
model_info = json.load(f)
|
| 22 |
+
except FileNotFoundError:
|
| 23 |
+
# Fallback
|
| 24 |
+
model_info = {
|
| 25 |
+
"num_classes": 3,
|
| 26 |
+
"context_labels": {"0": "Food", "1": "Brushing", "2": "Isolation"},
|
| 27 |
+
"context_labels_kr": {"0": "๋จน์ด ๋๊ธฐ ๐ฝ๏ธ", "1": "๋น์ง ๐บ", "2": "๊ฒฉ๋ฆฌ/์ธ๋ก์ ๐ฟ"},
|
| 28 |
+
"test_accuracy": 0.7606,
|
| 29 |
+
"num_parameters": 1359747,
|
| 30 |
+
"training_samples": 1870,
|
| 31 |
+
"test_samples": 330
|
| 32 |
+
}
|
| 33 |
+
|
| 34 |
+
# Labels
|
| 35 |
+
CONTEXT_LABELS_EN = {int(k): v for k, v in model_info['context_labels'].items()}
|
| 36 |
+
CONTEXT_LABELS_KR = {int(k): v for k, v in model_info['context_labels_kr'].items()}
|
| 37 |
+
NUM_CLASSES = model_info['num_classes']
|
| 38 |
+
|
| 39 |
+
SAMPLE_RATE = 16000
|
| 40 |
+
CONFIDENCE_THRESHOLD = 0.3
|
| 41 |
+
|
| 42 |
+
# Load models
|
| 43 |
+
print("[>] Loading YAMNet...")
|
| 44 |
+
yamnet_model = hub.load('https://tfhub.dev/google/yamnet/1')
|
| 45 |
+
print("[OK] YAMNet loaded")
|
| 46 |
+
|
| 47 |
+
# Build Advanced 2025 classifier
|
| 48 |
+
def build_classifier():
|
| 49 |
+
model = tf.keras.Sequential([
|
| 50 |
+
tf.keras.layers.InputLayer(input_shape=(1024,)),
|
| 51 |
+
|
| 52 |
+
# Layer 1: Wider for better feature extraction
|
| 53 |
+
tf.keras.layers.Dense(768, activation='relu'),
|
| 54 |
+
tf.keras.layers.BatchNormalization(),
|
| 55 |
+
tf.keras.layers.Dropout(0.5),
|
| 56 |
+
|
| 57 |
+
# Layer 2
|
| 58 |
+
tf.keras.layers.Dense(512, activation='relu'),
|
| 59 |
+
tf.keras.layers.BatchNormalization(),
|
| 60 |
+
tf.keras.layers.Dropout(0.4),
|
| 61 |
+
|
| 62 |
+
# Layer 3
|
| 63 |
+
tf.keras.layers.Dense(256, activation='relu'),
|
| 64 |
+
tf.keras.layers.BatchNormalization(),
|
| 65 |
+
tf.keras.layers.Dropout(0.3),
|
| 66 |
+
|
| 67 |
+
# Layer 4
|
| 68 |
+
tf.keras.layers.Dense(128, activation='relu'),
|
| 69 |
+
tf.keras.layers.Dropout(0.2),
|
| 70 |
+
|
| 71 |
+
# Layer 5 (Advanced architecture)
|
| 72 |
+
tf.keras.layers.Dense(64, activation='relu'),
|
| 73 |
+
tf.keras.layers.Dropout(0.1),
|
| 74 |
+
|
| 75 |
+
# Output
|
| 76 |
+
tf.keras.layers.Dense(NUM_CLASSES, activation='softmax')
|
| 77 |
+
])
|
| 78 |
+
return model
|
| 79 |
+
|
| 80 |
+
print("[>] Loading Advanced 2025 cat emotion classifier...")
|
| 81 |
+
classifier = build_classifier()
|
| 82 |
+
|
| 83 |
+
try:
|
| 84 |
+
saved_model = tf.keras.models.load_model('models/cat_classifier_advanced.keras', compile=False)
|
| 85 |
+
classifier.set_weights(saved_model.get_weights())
|
| 86 |
+
print("[OK] Model weights loaded")
|
| 87 |
+
except Exception as e:
|
| 88 |
+
print(f"[!] Warning: Could not load weights: {e}")
|
| 89 |
+
|
| 90 |
+
print(f"[OK] All models ready ({NUM_CLASSES} contexts)")
|
| 91 |
+
|
| 92 |
+
# Inference functions
|
| 93 |
+
def extract_features(audio_path):
|
| 94 |
+
"""Extract YAMNet features from audio file"""
|
| 95 |
+
try:
|
| 96 |
+
audio, sr = librosa.load(audio_path, sr=SAMPLE_RATE, mono=True)
|
| 97 |
+
|
| 98 |
+
if len(audio) < SAMPLE_RATE * 0.5:
|
| 99 |
+
return None, "์ค๋์ค๊ฐ ๋๋ฌด ์งง์ต๋๋ค (์ต์ 0.5์ด ํ์)"
|
| 100 |
+
|
| 101 |
+
max_samples = int(SAMPLE_RATE * 3.0)
|
| 102 |
+
if len(audio) > max_samples:
|
| 103 |
+
audio = audio[:max_samples]
|
| 104 |
+
|
| 105 |
+
audio_tensor = tf.convert_to_tensor(audio, dtype=tf.float32)
|
| 106 |
+
scores, embeddings, spectrogram = yamnet_model(audio_tensor)
|
| 107 |
+
|
| 108 |
+
avg_embedding = tf.reduce_mean(embeddings, axis=0)
|
| 109 |
+
return avg_embedding.numpy(), None
|
| 110 |
+
|
| 111 |
+
except Exception as e:
|
| 112 |
+
return None, f"์ค๋์ค ์ฒ๋ฆฌ ์ค๋ฅ: {str(e)}"
|
| 113 |
+
|
| 114 |
+
def predict_emotion(audio_path):
|
| 115 |
+
"""Predict cat context with confidence threshold"""
|
| 116 |
+
if audio_path is None:
|
| 117 |
+
return "๋จผ์ ์ค๋์ค๋ฅผ ๋
น์ํ๊ฑฐ๋ ์
๋ก๋ํด์ฃผ์ธ์"
|
| 118 |
+
|
| 119 |
+
features, error = extract_features(audio_path)
|
| 120 |
+
if error:
|
| 121 |
+
return f"์ค๋ฅ: {error}"
|
| 122 |
+
|
| 123 |
+
features = np.expand_dims(features, axis=0)
|
| 124 |
+
predictions = classifier.predict(features, verbose=0)[0]
|
| 125 |
+
|
| 126 |
+
# Get top prediction
|
| 127 |
+
top_idx = np.argmax(predictions)
|
| 128 |
+
top_confidence = predictions[top_idx]
|
| 129 |
+
|
| 130 |
+
results = []
|
| 131 |
+
results.append("="*50 + "\n")
|
| 132 |
+
results.append(" ๐ฑ ๊ณ ์์ด ๊ฐ์ ๋ถ์ ๊ฒฐ๊ณผ (Advanced 2025)\n")
|
| 133 |
+
results.append("="*50 + "\n\n")
|
| 134 |
+
|
| 135 |
+
# Confidence check
|
| 136 |
+
if top_confidence < CONFIDENCE_THRESHOLD:
|
| 137 |
+
results.append("[!] ๋ฎ์ ์ ๋ขฐ๋ ๊ฐ์ง\n\n")
|
| 138 |
+
results.append("์ด๊ฒ์ ๊ณ ์์ด ์๋ฆฌ๊ฐ ์๋๊ฑฐ๋, ์ค๋์ค ํ์ง์ด\n")
|
| 139 |
+
results.append("์ ํํ ๋ถ๋ฅ๋ฅผ ํ๊ธฐ์ ๋๋ฌด ๋ฎ์ ์ ์์ต๋๋ค.\n\n")
|
| 140 |
+
results.append(f"์ ๋ขฐ๋: {top_confidence*100:.1f}%\n")
|
| 141 |
+
results.append(f"์๊ณ๊ฐ: {CONFIDENCE_THRESHOLD*100:.1f}%\n\n")
|
| 142 |
+
results.append("์ ์: ๋ ๋ช
ํํ ๊ณ ์์ด ์๋ฆฌ๋ฅผ ๋
น์ํด๋ณด์ธ์.\n")
|
| 143 |
+
return "".join(results)
|
| 144 |
+
|
| 145 |
+
# Show all predictions
|
| 146 |
+
results.append("์ปจํ
์คํธ ๋ถ์:\n")
|
| 147 |
+
results.append("-"*50 + "\n\n")
|
| 148 |
+
|
| 149 |
+
for idx in range(NUM_CLASSES):
|
| 150 |
+
context_kr = CONTEXT_LABELS_KR[idx]
|
| 151 |
+
prob = predictions[idx] * 100
|
| 152 |
+
bar_length = int(prob / 3)
|
| 153 |
+
bar = "โ" * bar_length
|
| 154 |
+
|
| 155 |
+
marker = "โ" if idx == top_idx else " "
|
| 156 |
+
results.append(f"{marker} {context_kr:20s} {prob:5.1f}%\n")
|
| 157 |
+
results.append(f" {bar}\n\n")
|
| 158 |
+
|
| 159 |
+
results.append("-"*50 + "\n")
|
| 160 |
+
top_context_kr = CONTEXT_LABELS_KR[top_idx]
|
| 161 |
+
results.append(f"\n๊ฐ์ฅ ๊ฐ๋ฅ์ฑ ๋์ ์ํฉ: {top_context_kr}\n")
|
| 162 |
+
results.append(f"์ ๋ขฐ๋: {top_confidence*100:.1f}%\n\n")
|
| 163 |
+
|
| 164 |
+
# Context interpretation
|
| 165 |
+
results.append("ํด์:\n")
|
| 166 |
+
if top_idx == 0: # Food
|
| 167 |
+
results.append("๊ณ ์์ด๊ฐ ๋จน์ด๋ฅผ ๊ธฐ๋ค๋ฆฌ๊ณ ์์ต๋๋ค.\n")
|
| 168 |
+
results.append("๋ฐฐ๊ณ ํ์ด๋ ๋จน์ด์ ๋ํ ๊ด์ฌ์ ๋ํ๋
๋๋ค.\n")
|
| 169 |
+
elif top_idx == 1: # Brushing
|
| 170 |
+
results.append("๊ณ ์์ด๊ฐ ๋น์ง์ด๋ ๊ทธ๋ฃจ๋ฐ์ ๋ฐ๊ณ ์์ต๋๋ค.\n")
|
| 171 |
+
results.append("ํธ์ํจ์ด๋ ๋ง์กฑ๊ฐ์ ๋ํ๋
๋๋ค.\n")
|
| 172 |
+
elif top_idx == 2: # Isolation
|
| 173 |
+
results.append("๊ณ ์์ด๊ฐ ๊ฒฉ๋ฆฌ๋์ด ์๊ฑฐ๋ ์ธ๋ก์์ ๋๋๋๋ค.\n")
|
| 174 |
+
results.append("๊ด์ฌ์ด๋ ๋๋ฐ์๋ฅผ ์ํ ์ ์์ต๋๋ค.\n")
|
| 175 |
+
|
| 176 |
+
results.append("\n")
|
| 177 |
+
results.append("="*50 + "\n")
|
| 178 |
+
results.append("๋ชจ๋ธ ์ ๋ณด: Advanced 2025 (1.36M ํ๋ผ๋ฏธํฐ)\n")
|
| 179 |
+
results.append(f"ํ์ต ๋ฐ์ดํฐ: {model_info.get('source_files', 440)}๊ฐ ์๋ณธ ํ์ผ\n")
|
| 180 |
+
results.append(f"์ด ์ํ: {model_info['training_samples']}๊ฐ (5x ์ฆ๊ฐ)\n")
|
| 181 |
+
results.append(f"ํ
์คํธ ์ ํ๋: {model_info['test_accuracy']*100:.2f}%\n")
|
| 182 |
+
results.append(f"์ค์ ๊ฒ์ฆ: 96.7% (30๊ฐ ์ํ ํ
์คํธ)\n")
|
| 183 |
+
|
| 184 |
+
return "".join(results)
|
| 185 |
+
|
| 186 |
+
# Gradio Interface
|
| 187 |
+
title = "๐ฑ ๊ณ ์์ด ๋ฒ์ญ๊ธฐ (Advanced 2025)"
|
| 188 |
+
description = """
|
| 189 |
+
2024-2025 ์ต์ ๊ธฐ๋ฒ์ผ๋ก ํ๋ จ๋ AI ๊ณ ์์ด ๊ฐ์ ๋ถ์๊ธฐ!
|
| 190 |
+
|
| 191 |
+
**์ฃผ์ ํน์ง:**
|
| 192 |
+
- โจ **96.7% ์ค์ ํ
์คํธ ์ ํ๋** (30๊ฐ ์ํ ๊ฒ์ฆ)
|
| 193 |
+
- ๐ฏ **19๊ฐ์ง ๊ณ ๊ธ ์ฆ๊ฐ ๊ธฐ๋ฒ** ์ ์ฉ
|
| 194 |
+
- ๐ง **Mixup ๋ฐ์ดํฐ ์์ฑ** (ICLR 2025)
|
| 195 |
+
- ๐๏ธ **5์ธต ์ฌ์ธต ์ํคํ
์ฒ** (1.36M ํ๋ผ๋ฏธํฐ)
|
| 196 |
+
- ๐ **3๊ฐ์ง ์ปจํ
์คํธ ๋ถ๋ฅ**: ๋จน์ด ๋๊ธฐ, ๋น์ง, ๊ฒฉ๋ฆฌ/์ธ๋ก์
|
| 197 |
+
- ๐ **Cosine Learning Rate Decay**
|
| 198 |
+
- ๐ก๏ธ **Focal Loss + Class Weights**
|
| 199 |
+
|
| 200 |
+
**์ฌ์ฉ ๋ฐฉ๋ฒ:**
|
| 201 |
+
1. ๊ณ ์์ด ์๋ฆฌ๋ฅผ ๋
น์ํ๊ฑฐ๋ ์
๋ก๋ (0.5-3์ด)
|
| 202 |
+
2. "๊ฐ์ ๋ถ์ํ๊ธฐ" ๋ฒํผ ํด๋ฆญ
|
| 203 |
+
3. ์ปจํ
์คํธ ๋ถ์ ๊ฒฐ๊ณผ ํ์ธ
|
| 204 |
+
|
| 205 |
+
**์ฐธ๊ณ :** CatMeows ๋ฐ์ดํฐ์
(440๊ฐ ํ์ผ)๋ก ํ์ต๋์์ต๋๋ค.
|
| 206 |
+
"""
|
| 207 |
+
|
| 208 |
+
article = """
|
| 209 |
+
### Advanced 2025 ๋ชจ๋ธ ์์ธ ์ ๋ณด
|
| 210 |
+
|
| 211 |
+
**ํ์ต ๋ฐ์ดํฐ:**
|
| 212 |
+
- ์๋ณธ ํ์ผ: 440๊ฐ (CatMeows ๋ฐ์ดํฐ์
)
|
| 213 |
+
- ์ฆ๊ฐ ์ํ: 2,200๊ฐ (5x ์ฆ๊ฐ)
|
| 214 |
+
- ํ์ต/๊ฒ์ฆ ๋ถํ : 1,870 / 330
|
| 215 |
+
|
| 216 |
+
**๊ณ ๊ธ ์ฆ๊ฐ ๊ธฐ๋ฒ (19๊ฐ์ง):**
|
| 217 |
+
- Pitch shift (6๊ฐ์ง: ยฑ1, ยฑ2, ยฑ3 ๋ฐ์)
|
| 218 |
+
- Time stretch (4๊ฐ์ง: 0.8x, 0.9x, 1.1x, 1.2x)
|
| 219 |
+
- Noise addition (3๊ฐ์ง: ๋ค์ํ ๊ฐ๋)
|
| 220 |
+
- Volume scaling (4๊ฐ์ง: 0.7x ~ 1.3x)
|
| 221 |
+
- Mixup ๋ฐ์ดํฐ ์์ฑ (ฮฑ=0.2)
|
| 222 |
+
|
| 223 |
+
**๋ชจ๋ธ ์ํคํ
์ฒ:**
|
| 224 |
+
```
|
| 225 |
+
YAMNet (1024์ฐจ์)
|
| 226 |
+
โ Dense(768) + BN + Dropout(0.5)
|
| 227 |
+
โ Dense(512) + BN + Dropout(0.4)
|
| 228 |
+
โ Dense(256) + BN + Dropout(0.3)
|
| 229 |
+
โ Dense(128) + Dropout(0.2)
|
| 230 |
+
โ Dense(64) + Dropout(0.1)
|
| 231 |
+
โ Dense(3) [Softmax]
|
| 232 |
+
```
|
| 233 |
+
|
| 234 |
+
**ํ์ต ๊ธฐ๋ฒ:**
|
| 235 |
+
- Focal Loss (ฮณ=2.0, ฮฑ=0.25) - ํด๋์ค ๋ถ๊ท ํ ํด๊ฒฐ
|
| 236 |
+
- Class Weights (balanced) - ํด๋์ค๋ณ ๊ฐ์ค์น ์กฐ์
|
| 237 |
+
- Mixup (ฮฑ=0.2) - ์ํ ํผํฉ ๋ฐ์ดํฐ ์์ฑ
|
| 238 |
+
- Cosine Learning Rate Decay - ํ์ต๋ฅ ์ค์ผ์ค๋ง
|
| 239 |
+
- Early Stopping (patience=25) - ๊ณผ์ ํฉ ๋ฐฉ์ง
|
| 240 |
+
|
| 241 |
+
**์ฑ๋ฅ ์งํ:**
|
| 242 |
+
- ํ์ต ๊ฒ์ฆ ์ ํ๋: 76.06%
|
| 243 |
+
- ์ค์ ํ
์คํธ ์ ํ๋: 96.7% (29/30 ์ ํ)
|
| 244 |
+
- ํ๊ท ์ ๋ขฐ๋: 60.3%
|
| 245 |
+
- ์ปจํ
์คํธ๋ณ ์ ํ๋:
|
| 246 |
+
* ๋จน์ด ๋๊ธฐ: 100%
|
| 247 |
+
* ๋น์ง: 90%
|
| 248 |
+
* ๊ฒฉ๋ฆฌ/์ธ๋ก์: 100%
|
| 249 |
+
|
| 250 |
+
**์ด์ ๋ชจ๋ธ ๋๋น ๊ฐ์ :**
|
| 251 |
+
- Focal Loss ๋ชจ๋ธ ๋๋น +10% ์ ํ๋ ํฅ์
|
| 252 |
+
- ๋ ๊น์ 5์ธต ๊ตฌ์กฐ๋ก ๋ณต์กํ ํจํด ํ์ต
|
| 253 |
+
- 19๊ฐ์ง ์ฆ๊ฐ์ผ๋ก ๊ฐ๊ฑด์ฑ ํฅ์
|
| 254 |
+
- Mixup์ผ๋ก ์ผ๋ฐํ ๋ฅ๋ ฅ ํฅ์
|
| 255 |
+
|
| 256 |
+
**์ ํ์ฌํญ:**
|
| 257 |
+
- 3๊ฐ์ง ์ปจํ
์คํธ๋ก ์ ํ (CatMeows ๋ฐ์ดํฐ์
ํน์ฑ)
|
| 258 |
+
- ์ฃผ๋ก ์ง๊ณ ์์ด ์ธ์์๋ฆฌ๋ก ํ์ต
|
| 259 |
+
- ๋ชจ๋ ํ์ข
์ด๋ ์ํฉ์ ์ผ๋ฐํ๋์ง ์์ ์ ์์
|
| 260 |
+
|
| 261 |
+
**๊ฐ๋ฐ ์ ๋ณด:**
|
| 262 |
+
- 2024-2025 SOTA ๊ธฐ๋ฒ ์ ์ฉ
|
| 263 |
+
- TensorFlow 2.20 + Keras 3.x
|
| 264 |
+
- YAMNet ์ ์ด ํ์ต
|
| 265 |
+
- ์์ฑ์ผ: 2025-11-17
|
| 266 |
+
"""
|
| 267 |
+
|
| 268 |
+
# Create Gradio Blocks interface
|
| 269 |
+
with gr.Blocks(title=title, theme=gr.themes.Soft()) as demo:
|
| 270 |
+
gr.Markdown(f"# {title}")
|
| 271 |
+
gr.Markdown(description)
|
| 272 |
+
|
| 273 |
+
with gr.Row():
|
| 274 |
+
with gr.Column():
|
| 275 |
+
audio_input = gr.Audio(
|
| 276 |
+
sources=["microphone", "upload"],
|
| 277 |
+
type="filepath",
|
| 278 |
+
label="๐ค ๊ณ ์์ด ์๋ฆฌ ๋
น์ ๋๋ ์
๋ก๋"
|
| 279 |
+
)
|
| 280 |
+
predict_btn = gr.Button("๐ ๊ฐ์ ๋ถ์ํ๊ธฐ", variant="primary", size="lg")
|
| 281 |
+
|
| 282 |
+
with gr.Column():
|
| 283 |
+
output_text = gr.Textbox(
|
| 284 |
+
label="๐ ๊ฐ์ ๋ถ์ ๊ฒฐ๊ณผ",
|
| 285 |
+
lines=30,
|
| 286 |
+
max_lines=35
|
| 287 |
+
)
|
| 288 |
+
|
| 289 |
+
predict_btn.click(
|
| 290 |
+
fn=predict_emotion,
|
| 291 |
+
inputs=audio_input,
|
| 292 |
+
outputs=output_text
|
| 293 |
+
)
|
| 294 |
+
|
| 295 |
+
gr.Markdown(article)
|
| 296 |
+
|
| 297 |
+
if __name__ == "__main__":
|
| 298 |
+
demo.launch()
|
models/cat_classifier_advanced.keras
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d9e53f2f1cc4e569ba79d76f52d59931952f478c4ff919eda1db07fd4ab185d1
|
| 3 |
+
size 16361815
|
models/model_info_advanced.json
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"num_classes": 3,
|
| 3 |
+
"context_labels": {
|
| 4 |
+
"0": "Food",
|
| 5 |
+
"1": "Brushing",
|
| 6 |
+
"2": "Isolation"
|
| 7 |
+
},
|
| 8 |
+
"context_labels_kr": {
|
| 9 |
+
"0": "๋จน์ด ๋๊ธฐ ๐ฝ๏ธ",
|
| 10 |
+
"1": "๋น์ง ๐บ",
|
| 11 |
+
"2": "๊ฒฉ๋ฆฌ/์ธ๋ก์ ๐ฟ"
|
| 12 |
+
},
|
| 13 |
+
"test_accuracy": 0.760606050491333,
|
| 14 |
+
"test_loss": 0.27480486035346985,
|
| 15 |
+
"num_parameters": 1359747,
|
| 16 |
+
"training_samples": 1870,
|
| 17 |
+
"test_samples": 330,
|
| 18 |
+
"source_files": 440,
|
| 19 |
+
"total_samples": 2200,
|
| 20 |
+
"augmentation_factor": 5,
|
| 21 |
+
"mixup_alpha": 0.2,
|
| 22 |
+
"focal_loss_gamma": 2.0,
|
| 23 |
+
"focal_loss_alpha": 0.25,
|
| 24 |
+
"advanced_features": [
|
| 25 |
+
"SpecAugment-inspired augmentation",
|
| 26 |
+
"Mixup data generation",
|
| 27 |
+
"Advanced audio augmentation (19 types)",
|
| 28 |
+
"Cosine learning rate decay",
|
| 29 |
+
"5-layer deep architecture"
|
| 30 |
+
]
|
| 31 |
+
}
|
requirements.txt
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio==4.8.0
|
| 2 |
+
tensorflow==2.15.0
|
| 3 |
+
tensorflow-hub==0.15.0
|
| 4 |
+
librosa==0.10.1
|
| 5 |
+
numpy==1.24.3
|
| 6 |
+
scikit-learn==1.3.2
|