Spaces:

hrlima
/

emotion-api

Sleeping

App Files Files Community

hrlima commited on Nov 28, 2025

Commit

3d0e4a1

verified ·

1 Parent(s): a44c51b

Update app.py

Browse files

Files changed (1) hide show

app.py +78 -97

app.py CHANGED Viewed

@@ -26,27 +26,55 @@ try:
 except Exception as e:
     print(f"❌ Erro ao inicializar Firebase: {e}")
-# ====== PIPELINE: Apenas o modelo de áudio (Whisper fine-tuned) ======
 try:
     audio_pipeline = pipeline(
         task="audio-classification",
-        model="firdhokk/speech-emotion-recognition-with-openai-whisper-large-v3"
     )
-    print("✅ audio_pipeline carregado.")
 except Exception as e:
-    print(f"❌ Erro ao carregar audio_pipeline: {e}")
     audio_pipeline = None
 # ====== MAPEAMENTO DE EMOÇÕES (ING->PT) ======
 emotion_labels = {
     "angry": "raiva",
     "disgust": "insegurança",
     "fearful": "ansiedade",
     "fear": "ansiedade",
     "happy": "alegria",
     "neutral": "neutro",
     "sad": "tristeza",
     "surprised": "surpreso",
 }
 # ====== SUGESTÕES ======
@@ -63,7 +91,7 @@ def gerar_sugestao(emotion_pt):
     }
     return sugestoes.get(emotion_pt, "Mantenha o equilíbrio emocional e cuide de você mesmo.")
-# ====== FALLBACK APRIMORADO COM PALAVRAS-CHAVE (mantido) ======
 EMOTION_KEYWORDS = {
     "tristeza": ["triste","desanimado","melancólico","chateado","solitário","deprimido","abatido","infeliz","desmotivado"],
     "ansiedade": ["ansioso","preocupado","nervoso","tenso","inquieto","aflito","alarmado","sobrecarregado","inseguro","apreensivo"],
@@ -89,52 +117,7 @@ def fallback_emotion(text):
         "debug": "Fallback ativado"
     }
-# ====== AJUSTE HÍBRIDO (mantido) ======
-def hybrid_emotion(text, result):
-    text_lower = (text or "").lower()
-    detected = result.get("emotion", "neutro")
-    max_matches = 0
-    for emo, keywords in EMOTION_KEYWORDS.items():
-        matches = sum(2 for w in keywords if w in text_lower)
-        if matches > max_matches:
-            max_matches = matches
-            if emo != detected:
-                detected = emo
-    confidence = result.get("confidence", 0.0)
-    if detected != result.get("emotion"):
-        confidence = 0.7 + max_matches * 0.05
-    confidence = min(confidence, 1.0)
-    return {
-        "status": "ok",
-        "emotion": detected,
-        "emode": [detected],
-        "confidence": round(confidence, 2),
-        "probabilities": result.get("probabilities", {detected: 1.0}),
-        "suggestion": result.get("suggestion", gerar_sugestao(detected)),
-        "debug": result.get("debug", "Híbrido aplicado")
-    }
-# ====== HELPERS PARA ÁUDIO ======
-def save_bytes_to_tempfile(bbytes, suffix=".wav"):
-    fd, path = tempfile.mkstemp(suffix=suffix)
-    os.close(fd)
-    with open(path, "wb") as f:
-        f.write(bbytes)
-    return path
-def fetch_url_to_tempfile(url):
-    r = requests.get(url, timeout=15)
-    r.raise_for_status()
-    content_type = r.headers.get("content-type", "")
-    suffix = ".wav"
-    if "mpeg" in content_type or "mp3" in content_type:
-        suffix = ".mp3"
-    return save_bytes_to_tempfile(r.content, suffix=suffix)
-# ====== UTIL: Softmax com temperatura para calibrar probabilidades ======
 def tempered_softmax(scores_dict, temperature=1.0):
     labels = list(scores_dict.keys())
     vals = np.array([scores_dict[l] for l in labels], dtype=float)
@@ -145,7 +128,6 @@ def tempered_softmax(scores_dict, temperature=1.0):
     probs = exps / np.sum(exps)
     return dict(zip(labels, probs))
-# ====== UTIL: média/união de probabilidades ======
 def average_probabilities(list_of_prob_dicts):
     all_keys = set()
     for d in list_of_prob_dicts:
@@ -158,13 +140,30 @@ def average_probabilities(list_of_prob_dicts):
             avg[k] += d.get(k, 0.0)
     n = len(list_of_prob_dicts)
     for k in avg:
-        avg[k] = avg[k] / n
     total = sum(avg.values()) or 1.0
     for k in avg:
-        avg[k] = avg[k] / total
     return avg
-# ====== ROTA DE ANÁLISE (apenas modelo firdhokk, precisão aumentada por ensemble interno) ======
 @app.route("/analyze", methods=["POST"])
 def analyze():
     try:
@@ -172,7 +171,7 @@ def analyze():
         audio_bytes = None
         data = None
-        # prioridade: arquivo multipart 'file'
         if "file" in request.files:
             f = request.files["file"]
             audio_bytes = f.read()
@@ -181,14 +180,12 @@ def analyze():
                 data = request.get_json(silent=True)
             except Exception:
                 data = None
             if data:
                 if "audio_base64" in data:
                     audio_bytes = base64.b64decode(data["audio_base64"])
                 elif "audio_url" in data:
                     audio_path = fetch_url_to_tempfile(data["audio_url"])
                 elif "text" in data and (not audio_bytes and not audio_path):
-                    # apenas texto -> fallback textual
                     return jsonify(fallback_emotion(data["text"]))
         if audio_bytes:
@@ -202,60 +199,50 @@ def analyze():
                 return jsonify(fallback_emotion(data["text"]))
             return jsonify({"error": "Modelo de áudio indisponível no momento."}), 500
-        # -------------------------
-        # EXECUTAR VÁRIAS PASSAGENS (ensemble interno)
-        # -------------------------
-        # lista de top_k para executar o pipeline (captura incertezas)
-        topk_list = [10, 15, 20]
-        run_probs = []  # armazenará dicts label->score para cada run (antes de softmax)
-        raw_runs = []   # debug: guardar raw_result para inspeção
         for topk in topk_list:
             try:
                 raw_result = audio_pipeline(audio_path, top_k=topk)
-                # normalizar formato: raw_result é lista de dicts [{'label':..., 'score':...}, ...]
                 probs = {}
                 for item in raw_result:
-                    label = item.get("label", "").lower()
-                    if label == "fear":
-                        label = "fearful"
-                    probs[label] = float(item.get("score", 0.0))
                 if probs:
                     run_probs.append(probs)
                     raw_runs.append({"top_k": topk, "raw": raw_result})
             except Exception as e:
-                # log e seguir para próximas tentativas (não interromper totalmente)
-                print(f"⚠️ audio_pipeline falhou no top_k={topk}: {e}")
         if not run_probs:
             return jsonify({"error": "Modelo não retornou rótulos em nenhuma tentativa."}), 500
-        # 1) média das probabilidades (por rótulo) entre as execuções
         avg_probs = average_probabilities(run_probs)
-        # 2) recalibrar com temperatura (temperatura menor -> mais "afiado")
-        temp = float(os.getenv("AUDIO_SOFTMAX_TEMP", 0.6))  # default 0.6 para maior precisão
-        calibrated_probs = tempered_softmax(avg_probs, temperature=temp)
-        # 3) opcional: aplicar pequena regra de confiança mínima para reduzir rótulos com prob insignificante
-        # (zero out labels abaixo threshold then renormalize)
-        min_prob_threshold = float(os.getenv("MIN_LABEL_PROB", 0.02))  # 2% por padrão
-        filtered = {k: v if v >= min_prob_threshold else 0.0 for k, v in calibrated_probs.items()}
         totalf = sum(filtered.values()) or 1.0
         normalized = {k: (v / totalf) for k, v in filtered.items()}
-        # escolher rótulo final
         top_label = max(normalized, key=normalized.get)
         top_score = normalized[top_label]
-        # map to portuguese
         emotion_pt = emotion_labels.get(top_label, "desconhecido")
-        # regra de negócio: tristeza muito forte -> depressão
-        if emotion_pt == "tristeza" and top_score >= float(os.getenv("DEPRESSION_THRESHOLD", 0.92)):
             emotion_pt = "depressão"
-        # montar probabilidades para output (mapeadas p/ pt)
         probabilities_pt = { emotion_labels.get(k, k): round(float(v), 3) for k, v in normalized.items() }
         base_result = {
@@ -266,21 +253,15 @@ def analyze():
             "probabilities": probabilities_pt,
             "suggestion": gerar_sugestao(emotion_pt),
             "debug": {
                 "runs": raw_runs,
                 "avg_probs": {k: round(float(v), 4) for k, v in avg_probs.items()},
-                "calibrated_probs": {k: round(float(v), 4) for k, v in calibrated_probs.items()},
-                "normalized_probs": {k: round(float(v), 4) for k, v in normalized.items()}
             }
         }
-        # permitir que cliente envie 'text' (override/híbrido) — mantido como opção leve
-        text_for_hybrid = None
-        if data and "text" in data:
-            text_for_hybrid = data["text"]
-        final_result = hybrid_emotion(text_for_hybrid, base_result) if text_for_hybrid else base_result
-        return jsonify(final_result)
     except Exception as e:
         print(f"❌ Erro na rota /analyze: {e}")
@@ -293,4 +274,4 @@ def analyze():
             pass
 if __name__ == "__main__":
-    app.run(host="0.0.0.0", port=int(os.getenv("PORT", 7860)))

 except Exception as e:
     print(f"❌ Erro ao inicializar Firebase: {e}")
+# ====== CONFIGS AJUSTÁVEIS (env vars) ======
+# Modelo recomendado para PT (substitua se quiser um checkpoint em inglês)
+AUDIO_SER_MODEL = os.getenv("AUDIO_SER_MODEL", "alefiury/wav2vec2-xls-r-300m-pt-br-spontaneous-speech-emotion-recognition")
+# Ex.: set ENABLE_ASR=true para ativar ASR (pode consumir muita memória)
+ENABLE_ASR = os.getenv("ENABLE_ASR", "false").lower() in ("1", "true", "yes")
+AUDIO_TOPK_RUNS = os.getenv("AUDIO_TOPK_RUNS", "10,15,20")  # exemplo: "10,15,20"
+AUDIO_SOFTMAX_TEMP = float(os.getenv("AUDIO_SOFTMAX_TEMP", "0.6"))
+MIN_LABEL_PROB = float(os.getenv("MIN_LABEL_PROB", "0.02"))
+DEPRESSION_THRESHOLD = float(os.getenv("DEPRESSION_THRESHOLD", "0.92"))
+# ====== PIPELINE: modelo SER (wav2vec2 finetuned) ======
 try:
     audio_pipeline = pipeline(
         task="audio-classification",
+        model=AUDIO_SER_MODEL
     )
+    print(f"✅ audio_pipeline carregado: {AUDIO_SER_MODEL}")
 except Exception as e:
+    print(f"❌ Erro ao carregar audio_pipeline ({AUDIO_SER_MODEL}): {e}")
     audio_pipeline = None
+# Opcional: ASR (desativado por padrão para economia de recursos)
+asr_pipeline = None
+if ENABLE_ASR:
+    try:
+        asr_pipeline = pipeline(
+            task="automatic-speech-recognition",
+            model="openai/whisper-large-v3"
+        )
+        print("✅ asr_pipeline carregado (ENABLE_ASR=true).")
+    except Exception as e:
+        print(f"⚠️ ASR indisponível: {e}")
+        asr_pipeline = None
 # ====== MAPEAMENTO DE EMOÇÕES (ING->PT) ======
+# OBS: cada modelo pode usar rótulos ligeiramente diferentes; padronizamos para estes
 emotion_labels = {
     "angry": "raiva",
+    "anger": "raiva",
     "disgust": "insegurança",
     "fearful": "ansiedade",
     "fear": "ansiedade",
     "happy": "alegria",
+    "joy": "alegria",
     "neutral": "neutro",
     "sad": "tristeza",
+    "sadness": "tristeza",
     "surprised": "surpreso",
+    "surprise": "surpreso",
 }
 # ====== SUGESTÕES ======
     }
     return sugestoes.get(emotion_pt, "Mantenha o equilíbrio emocional e cuide de você mesmo.")
+# ====== FALLBACK POR TEXTO ======
 EMOTION_KEYWORDS = {
     "tristeza": ["triste","desanimado","melancólico","chateado","solitário","deprimido","abatido","infeliz","desmotivado"],
     "ansiedade": ["ansioso","preocupado","nervoso","tenso","inquieto","aflito","alarmado","sobrecarregado","inseguro","apreensivo"],
         "debug": "Fallback ativado"
     }
+# ====== UTIL: softmax temperado ======
 def tempered_softmax(scores_dict, temperature=1.0):
     labels = list(scores_dict.keys())
     vals = np.array([scores_dict[l] for l in labels], dtype=float)
     probs = exps / np.sum(exps)
     return dict(zip(labels, probs))
 def average_probabilities(list_of_prob_dicts):
     all_keys = set()
     for d in list_of_prob_dicts:
             avg[k] += d.get(k, 0.0)
     n = len(list_of_prob_dicts)
     for k in avg:
+        avg[k] /= n
     total = sum(avg.values()) or 1.0
     for k in avg:
+        avg[k] /= total
     return avg
+# ====== HELPERS ÁUDIO ======
+def save_bytes_to_tempfile(bbytes, suffix=".wav"):
+    fd, path = tempfile.mkstemp(suffix=suffix)
+    os.close(fd)
+    with open(path, "wb") as f:
+        f.write(bbytes)
+    return path
+def fetch_url_to_tempfile(url):
+    r = requests.get(url, timeout=15)
+    r.raise_for_status()
+    content_type = r.headers.get("content-type", "")
+    suffix = ".wav"
+    if "mpeg" in content_type or "mp3" in content_type:
+        suffix = ".mp3"
+    return save_bytes_to_tempfile(r.content, suffix=suffix)
+# ====== ROTA /analyze ======
 @app.route("/analyze", methods=["POST"])
 def analyze():
     try:
         audio_bytes = None
         data = None
+        # receber multipart/file ou json
         if "file" in request.files:
             f = request.files["file"]
             audio_bytes = f.read()
                 data = request.get_json(silent=True)
             except Exception:
                 data = None
             if data:
                 if "audio_base64" in data:
                     audio_bytes = base64.b64decode(data["audio_base64"])
                 elif "audio_url" in data:
                     audio_path = fetch_url_to_tempfile(data["audio_url"])
                 elif "text" in data and (not audio_bytes and not audio_path):
                     return jsonify(fallback_emotion(data["text"]))
         if audio_bytes:
                 return jsonify(fallback_emotion(data["text"]))
             return jsonify({"error": "Modelo de áudio indisponível no momento."}), 500
+        # ----- Ensemble interno: múltiplas runs com diferentes top_k -----
+        topk_list = [int(x) for x in AUDIO_TOPK_RUNS.split(",") if x.strip().isdigit()]
+        if not topk_list:
+            topk_list = [10, 15, 20]
+        run_probs = []
+        raw_runs = []
         for topk in topk_list:
             try:
                 raw_result = audio_pipeline(audio_path, top_k=topk)
                 probs = {}
+                # raw_result é lista de dicts
                 for item in raw_result:
+                    lbl = item.get("label", "").lower()
+                    if lbl == "fear":
+                        lbl = "fearful"
+                    probs[lbl] = float(item.get("score", 0.0))
                 if probs:
                     run_probs.append(probs)
                     raw_runs.append({"top_k": topk, "raw": raw_result})
             except Exception as e:
+                print(f"⚠️ audio_pipeline falhou top_k={topk}: {e}")
         if not run_probs:
             return jsonify({"error": "Modelo não retornou rótulos em nenhuma tentativa."}), 500
         avg_probs = average_probabilities(run_probs)
+        # recalibrar com temperatura (mais baixa => mais confiante)
+        calibrated = tempered_softmax(avg_probs, temperature=AUDIO_SOFTMAX_TEMP)
+        # filtrar rótulos fracos
+        filtered = {k: (v if v >= MIN_LABEL_PROB else 0.0) for k, v in calibrated.items()}
         totalf = sum(filtered.values()) or 1.0
         normalized = {k: (v / totalf) for k, v in filtered.items()}
         top_label = max(normalized, key=normalized.get)
         top_score = normalized[top_label]
         emotion_pt = emotion_labels.get(top_label, "desconhecido")
+        if emotion_pt == "tristeza" and top_score >= DEPRESSION_THRESHOLD:
             emotion_pt = "depressão"
         probabilities_pt = { emotion_labels.get(k, k): round(float(v), 3) for k, v in normalized.items() }
         base_result = {
             "probabilities": probabilities_pt,
             "suggestion": gerar_sugestao(emotion_pt),
             "debug": {
+                "model": AUDIO_SER_MODEL,
                 "runs": raw_runs,
                 "avg_probs": {k: round(float(v), 4) for k, v in avg_probs.items()},
+                "calibrated": {k: round(float(v), 4) for k, v in calibrated.items()},
+                "normalized": {k: round(float(v), 4) for k, v in normalized.items()}
             }
         }
+        return jsonify(base_result)
     except Exception as e:
         print(f"❌ Erro na rota /analyze: {e}")
             pass
 if __name__ == "__main__":
+    app.run(host="0.0.0.0", port=int(os.getenv("PORT", 7860)))