Spaces:

pmikh26
/

Emotion_Project

Running

App Files Files Community

Emotion_Project / app.py

pmikh26

Update app.py

2f1e061 verified 3 days ago

raw

history blame contribute delete

3.31 kB

	import os
	import pickle
	import numpy as np
	import pandas as pd
	import gradio as gr
	import soundfile as sf

	from faster_whisper import WhisperModel
	from sentence_transformers import SentenceTransformer
	from sklearn.metrics.pairwise import cosine_similarity


	EMBED_MODEL_NAME = "sentence-transformers/all-mpnet-base-v2"
	CENTROIDS_PATH = "emotion_avg.pkl"

	with open(CENTROIDS_PATH, "rb") as f:
	emotion_avg = pickle.load(f)

	for k in list(emotion_avg.keys()):
	emotion_avg[k] = np.array(emotion_avg[k])

	EMOTIONS = list(emotion_avg.keys())


	embedder = SentenceTransformer(EMBED_MODEL_NAME)
	whisper_model = WhisperModel("base", compute_type="int8")


	def predict_emotion_sentence(sentence):
	emb = embedder.encode([sentence], convert_to_numpy=True)[0]
	labels = []
	sims = []

	for emotion in EMOTIONS:
	sim = cosine_similarity(
	emb.reshape(1, -1),
	emotion_avg[emotion].reshape(1, -1)
	)[0][0]
	labels.append(emotion)
	sims.append(sim)

	order = np.argsort(sims)[::-1]
	best_idx = order[0]
	second_idx = order[1] if len(order) > 1 else order[0]

	return {
	"emotion": labels[best_idx],
	"score": float(sims[best_idx]),
	"margin": float(sims[best_idx] - sims[second_idx])
	}


	def analyze_audio(audio_path):
	if audio_path is None:
	return "No transcript yet.", "None", 0.0, pd.DataFrame(columns=["sentence", "emotion", "score", "margin"])

	segments, _ = whisper_model.transcribe(audio_path)

	transcript_parts = []
	rows = []

	for seg in segments:
	text = seg.text.strip()
	if not text:
	continue
	transcript_parts.append(text)

	pred = predict_emotion_sentence(text)
	rows.append({
	"sentence": text,
	"emotion": pred["emotion"],
	"score": pred["score"],
	"margin": pred["margin"]
	})

	transcript = " ".join(transcript_parts).strip()

	if rows:
	latest = rows[-1]
	latest_emotion = latest["emotion"]
	latest_margin = latest["margin"]
	else:
	latest_emotion = "None"
	latest_margin = 0.0

	df = pd.DataFrame(rows)
	return transcript, latest_emotion, latest_margin, df


	with gr.Blocks(title="Emotion Speech Classifier") as demo:
	gr.Markdown("# Emotion Speech Classifier")
	gr.Markdown("Upload or record audio, transcribe it, and detect sentence-level emotion.")

	with gr.Row():
	with gr.Column(scale=1):
	audio_input = gr.Audio(
	sources=["microphone", "upload"],
	type="filepath",
	label="Audio Input"
	)
	run_btn = gr.Button("Analyze Audio")

	with gr.Column(scale=2):
	transcript_box = gr.Textbox(label="Transcript", lines=8)
	with gr.Row():
	latest_emotion_box = gr.Textbox(label="Latest Emotion")
	margin_box = gr.Number(label="Match Margin")

	results_df = gr.Dataframe(
	headers=["sentence", "emotion", "score", "margin"],
	label="Sentence Analysis"
	)

	run_btn.click(
	fn=analyze_audio,
	inputs=audio_input,
	outputs=[transcript_box, latest_emotion_box, margin_box, results_df]
	)

	if __name__ == "__main__":
	demo.launch()