Spaces:

Arjunadhithya
/

Speech-Analyser

Sleeping

App Files Files Community

Speech-Analyser / backend.py

Arjunadhithya

Upload 7 files

1f75494 verified about 1 year ago

raw

history blame contribute delete

3.6 kB

	from fastapi import FastAPI, File, UploadFile
	import uvicorn
	import openai
	import torch
	import torchaudio
	import torchaudio.transforms as T
	from transformers import Wav2Vec2FeatureExtractor, AutoModelForAudioClassification
	import whisper
	import os

	app = FastAPI()

	# Load Whisper model for transcription
	whisper_model = whisper.load_model("small")

	# Load speech emotion recognition model
	ser_model_name = "superb/wav2vec2-base-superb-er"
	feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(ser_model_name)
	ser_model = AutoModelForAudioClassification.from_pretrained(ser_model_name)

	# OpenAI API Key
	openai.api_key = os.getenv("OPENAI_API_KEY") # Ensure you set this in the terminal before running

	@app.post("/process_audio/")
	async def process_audio(file: UploadFile = File(...)):
	try:
	print(f"✅ File received: {file.filename}")

	# Save audio
	audio_path = "temp_audio.wav"
	with open(audio_path, "wb") as f:
	f.write(await file.read())
	print("✅ Audio saved successfully!")

	# 🟢 TEST 1: Check if the file is corrupted
	try:
	waveform, sample_rate = torchaudio.load(audio_path)
	print(f"✅ Audio loaded! Shape: {waveform.shape}, Sample Rate: {sample_rate}")
	except Exception as e:
	return {"error": f"❌ Audio loading failed: {e}"}

	# 🟢 TEST 2: Whisper Transcription
	try:
	transcription = whisper_model.transcribe(audio_path)["text"]
	print(f"✅ Whisper Transcription: {transcription}")
	except Exception as e:
	return {"error": f"❌ Whisper failed: {e}"}

	# 🟢 TEST 3: Emotion Recognition
	try:
	if waveform.shape[0] > 1:
	waveform = torch.mean(waveform, dim=0, keepdim=True)
	if sample_rate != 16000:
	resampler = T.Resample(sample_rate, 16000)
	waveform = resampler(waveform)

	inputs = feature_extractor(waveform, sampling_rate=16000, return_tensors="pt", padding=True)
	with torch.no_grad():
	logits = ser_model(**inputs).logits
	predicted_class = torch.argmax(logits, dim=-1).item()
	emotions = ["neutral", "happy", "sad", "angry", "fearful", "disgust", "surprised"]
	emotion_detected = emotions[predicted_class] if predicted_class < len(emotions) else "unknown"
	print(f"✅ Emotion Detected: {emotion_detected}")
	except Exception as e:
	return {"error": f"❌ Emotion recognition failed: {e}"}

	# 🟢 TEST 4: OpenAI API Summarization
	try:
	summary_response = openai.ChatCompletion.create(
	model="gpt-4-turbo",
	messages=[
	{"role": "system", "content": "Summarize the following text."},
	{"role": "user", "content": transcription}
	]
	)
	summary = summary_response["choices"][0]["message"]["content"]
	print(f"✅ OpenAI Summary: {summary}")
	except Exception as e:
	return {"error": f"❌ OpenAI Summarization failed: {e}"}

	return {
	"transcription": transcription,
	"emotion": emotion_detected,
	"summary": summary
	}

	except Exception as e:
	print(f"❌ Error in process_audio: {e}")
	return {"error": str(e)}

	if __name__ == "__main__":
	uvicorn.run(app, host="0.0.0.0", port=8000)