Spaces:

GannaEslam38
/

OFOQ-AI-Engine

Sleeping

App Files Files Community

OFOQ-AI-Engine / utils.py

GannaEslam38

Update utils.py

e3a77d9 verified about 1 month ago

raw

history blame contribute delete

3.24 kB

	import torch
	import librosa
	import numpy as np
	import io
	from pydub import AudioSegment
	from transformers import (
	AutoFeatureExtractor,
	AutoModelForAudioClassification,
	pipeline,
	AutoTokenizer,
	AutoModelForSequenceClassification
	)

	# Device configuration (Hugging Face Free Spaces use CPU by default)
	device = "cuda" if torch.cuda.is_available() else "cpu"
	print(f"🚀 OFOQ Engine is running on: {device.upper()}")

	# 1. Load Models from Hugging Face Hub (แทนที่ local folders)
	print("⏳ Loading OFOQ Models from Hugging Face Hub...")

	# --- Whisper Detection (Your Fine-tuned Model) ---
	model_path_detection = "GannaEslam38/OFOQ-Whisper-Detection"
	feature_extractor = AutoFeatureExtractor.from_pretrained(model_path_detection)
	detection_model = AutoModelForAudioClassification.from_pretrained(model_path_detection).to(device)

	# --- Arabic Whisper (Speech-to-Text) ---
	whisper_pipeline = pipeline(
	"automatic-speech-recognition",
	model="MohamedRashad/Arabic-Whisper-CodeSwitching-Edition",
	device=0 if torch.cuda.is_available() else -1
	)

	# --- MARBERT Cheating Classifier (Your Fine-tuned Model) ---
	model_path_cheating = "GannaEslam38/OFOQ-Cheating-Classifier"
	bert_tokenizer = AutoTokenizer.from_pretrained(model_path_cheating)
	bert_model = AutoModelForSequenceClassification.from_pretrained(model_path_cheating).to(device)

	text_classifier = pipeline(
	"text-classification",
	model=bert_model,
	tokenizer=bert_tokenizer,
	device=0 if torch.cuda.is_available() else -1
	)

	def run_ofoq_logic(audio_bytes):
	try:
	# Preprocessing
	audio_stream = io.BytesIO(audio_bytes)
	audio_segment = AudioSegment.from_file(audio_stream)

	wav_io = io.BytesIO()
	audio_segment.export(wav_io, format="wav")
	wav_io.seek(0)

	audio_data, _ = librosa.load(wav_io, sr=16000)

	except Exception as e:
	print(f"❌ Audio Processing Error: {e}")
	return {"status": "Error", "message": f"Could not process audio format: {str(e)}"}

	# Phase 1: Whisper/Hiss Detection
	inputs = feature_extractor(audio_data, sampling_rate=16000, return_tensors="pt").to(device)

	with torch.no_grad():
	logits = detection_model(**inputs).logits
	prediction_label = torch.argmax(logits, dim=-1).item()

	if prediction_label == 0:
	return {"status": "Normal", "label": 0}

	# Phase 2: Speech-to-Text via Whisper
	boosted_audio = audio_data / (np.max(np.abs(audio_data)) + 1e-9)

	# Transcription
	stt_res = whisper_pipeline(boosted_audio, generate_kwargs={"language": "arabic"})
	raw_text = stt_res["text"].strip()

	# Phase 3: Intent Classification (Cheating vs Safe)
	if not raw_text:
	return {"status": "Silent_Whisper", "label": 0, "whisper_text": ""}

	# Inference for text classification
	bert_res = text_classifier(raw_text)[0]

	is_cheating = 1 if bert_res['label'] == "LABEL_1" else 0

	return {
	"status": "Cheating" if is_cheating == 1 else "Safe_Whisper",
	"whisper_text": raw_text,
	"confidence": float(bert_res['score']),
	"label": is_cheating,
	"device_used": device
	}