Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import numpy as np | |
| import librosa, joblib, os | |
| from sklearn.ensemble import RandomForestClassifier | |
| from transformers import pipeline | |
| # ββ 1) θ¨η·΄οΌθΌε ₯θͺι³ζ¨‘ε ββ | |
| BASE_VOICE_PATH = r"C:\ζ η·" | |
| VOICE_MODEL_FILE = "voice_model.joblib" | |
| def train_voice_model(): | |
| labels = ["angry","happy","sad","fear","surprise"] | |
| X, y = [], [] | |
| for lbl in labels: | |
| folder = os.path.join(BASE_VOICE_PATH, lbl) | |
| if not os.path.isdir(folder): | |
| raise FileNotFoundError(f"ζΎδΈε°θ³ζε€ΎοΌ{folder}") | |
| for fname in os.listdir(folder): | |
| if fname.lower().endswith(".wav"): | |
| path = os.path.join(folder, fname) | |
| audio, sr = librosa.load(path, sr=None) | |
| mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=13) | |
| mfcc_mean = np.mean(mfccs.T, axis=0) | |
| X.append(mfcc_mean) | |
| y.append(lbl) | |
| clf = RandomForestClassifier(n_estimators=100, random_state=42) | |
| clf.fit(X, y) | |
| joblib.dump(clf, VOICE_MODEL_FILE) | |
| return clf | |
| if os.path.exists(VOICE_MODEL_FILE): | |
| voice_clf = joblib.load(VOICE_MODEL_FILE) | |
| else: | |
| voice_clf = train_voice_model() | |
| def analyze_audio(path): | |
| audio, sr = librosa.load(path, sr=None) | |
| mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=13) | |
| mfcc_mean = np.mean(mfccs.T, axis=0).reshape(1, -1) | |
| return voice_clf.predict(mfcc_mean)[0] | |
| # ββ 2) δΈζζεζ η·εζ ββ | |
| def analyze_text(text): | |
| if any(w in text for w in ["ιεΏ","εΏ«ζ¨"]): return "happy" | |
| if any(w in text for w in ["ηζ°£","ζ€ζ"]): return "angry" | |
| if any(w in text for w in ["ε·εΏ","ι£ι","ε"]): return "sad" | |
| if any(w in text for w in ["ι©","ζε€"]): return "surprise" | |
| if any(w in text for w in ["ζ","ζζΌ"]): return "fear" | |
| return "neutral" | |
| # ββ 3) ε³ζθι¨ζ η·εζοΌζΉη¨ Hugging Face ferplusοΌ ββ | |
| face_classifier = pipeline( | |
| "image-classification", | |
| model="nateraw/ferplus", | |
| device=-1 # CPU | |
| ) | |
| def analyze_face(img): | |
| # img: PIL image or numpy array | |
| result = face_classifier(img, top_k=1)[0] | |
| return result["label"] | |
| # ββ 4) ε»Ίη« Gradio ε€ζ¨η±€δ»ι’ ββ | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# ε€ζ¨‘ζ ζ η·εζη€Ίη―") | |
| with gr.Tab("π ζε"): | |
| txt = gr.Textbox(placeholder="θΌΈε ₯δΈζβ¦") | |
| btn_txt = gr.Button("εζζε") | |
| out_txt = gr.Textbox() | |
| btn_txt.click(analyze_text, inputs=txt, outputs=out_txt) | |
| with gr.Tab("π€ θͺι³"): | |
| aud = gr.Audio(type="filepath") | |
| btn_aud = gr.Button("εζθͺι³") | |
| out_aud = gr.Textbox() | |
| btn_aud.click(analyze_audio, inputs=aud, outputs=out_aud) | |
| with gr.Tab("π· θι¨"): | |
| img_cam = gr.Image(source="webcam") | |
| btn_img = gr.Button("εζ葨ζ ") | |
| out_img = gr.Textbox() | |
| btn_img.click(analyze_face, inputs=img_cam, outputs=out_img) | |
| demo.launch() | |