Spaces:

Rivalcoder
/

Lite

Sleeping

Lite / alm_pipeline.py

Rivalcoder

Add Files

5a416b3 16 days ago

1.31 kB

	import whisper
	import librosa
	import numpy as np
	import tensorflow_hub as hub

	# Load ASR
	asr_model = whisper.load_model("small")

	# Load YAMNet for sound classification
	yamnet = hub.load("https://tfhub.dev/google/yamnet/1")
	class_map = yamnet.class_map_path().numpy()

	# Simple Emotion Estimator (from YAMNet embedding)
	def estimate_emotion(activation):
	mean_val = activation.mean()
	if mean_val > 0.3:
	return "Happy / Excited"
	elif mean_val < -0.3:
	return "Sad / Depressed"
	return "Neutral"


	def speech_to_text(audio):
	result = asr_model.transcribe(audio)
	return result["text"]


	def detect_sound(audio):
	waveform, sr = librosa.load(audio, sr=16000)
	waveform = waveform.reshape(1, -1)
	scores, embeddings, _ = yamnet(waveform)
	mean_scores = np.mean(scores.numpy(), axis=0)
	top_idx = np.argmax(mean_scores)
	return class_map[top_idx].decode("utf-8"), mean_scores.max()


	def analyze_audio(audio_file):
	summary = {}

	summary["transcription"] = speech_to_text(audio_file)

	event, confidence = detect_sound(audio_file)
	summary["sound_event"] = event
	summary["sound_confidence"] = float(confidence)

	summary["emotion"] = "Neutral (approx)"

	summary["speakers"] = "Not available in HF-free version"

	return summary