Spaces:
Sleeping
Sleeping
| import torch | |
| import librosa | |
| import numpy as np | |
| import io | |
| from pydub import AudioSegment | |
| from transformers import ( | |
| AutoFeatureExtractor, | |
| AutoModelForAudioClassification, | |
| pipeline, | |
| AutoTokenizer, | |
| AutoModelForSequenceClassification | |
| ) | |
| # Device configuration (Hugging Face Free Spaces use CPU by default) | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| print(f"🚀 OFOQ Engine is running on: {device.upper()}") | |
| # 1. Load Models from Hugging Face Hub (แทนที่ local folders) | |
| print("⏳ Loading OFOQ Models from Hugging Face Hub...") | |
| # --- Whisper Detection (Your Fine-tuned Model) --- | |
| model_path_detection = "GannaEslam38/OFOQ-Whisper-Detection" | |
| feature_extractor = AutoFeatureExtractor.from_pretrained(model_path_detection) | |
| detection_model = AutoModelForAudioClassification.from_pretrained(model_path_detection).to(device) | |
| # --- Arabic Whisper (Speech-to-Text) --- | |
| whisper_pipeline = pipeline( | |
| "automatic-speech-recognition", | |
| model="MohamedRashad/Arabic-Whisper-CodeSwitching-Edition", | |
| device=0 if torch.cuda.is_available() else -1 | |
| ) | |
| # --- MARBERT Cheating Classifier (Your Fine-tuned Model) --- | |
| model_path_cheating = "GannaEslam38/OFOQ-Cheating-Classifier" | |
| bert_tokenizer = AutoTokenizer.from_pretrained(model_path_cheating) | |
| bert_model = AutoModelForSequenceClassification.from_pretrained(model_path_cheating).to(device) | |
| text_classifier = pipeline( | |
| "text-classification", | |
| model=bert_model, | |
| tokenizer=bert_tokenizer, | |
| device=0 if torch.cuda.is_available() else -1 | |
| ) | |
| def run_ofoq_logic(audio_bytes): | |
| try: | |
| # Preprocessing | |
| audio_stream = io.BytesIO(audio_bytes) | |
| audio_segment = AudioSegment.from_file(audio_stream) | |
| wav_io = io.BytesIO() | |
| audio_segment.export(wav_io, format="wav") | |
| wav_io.seek(0) | |
| audio_data, _ = librosa.load(wav_io, sr=16000) | |
| except Exception as e: | |
| print(f"❌ Audio Processing Error: {e}") | |
| return {"status": "Error", "message": f"Could not process audio format: {str(e)}"} | |
| # Phase 1: Whisper/Hiss Detection | |
| inputs = feature_extractor(audio_data, sampling_rate=16000, return_tensors="pt").to(device) | |
| with torch.no_grad(): | |
| logits = detection_model(**inputs).logits | |
| prediction_label = torch.argmax(logits, dim=-1).item() | |
| if prediction_label == 0: | |
| return {"status": "Normal", "label": 0} | |
| # Phase 2: Speech-to-Text via Whisper | |
| boosted_audio = audio_data / (np.max(np.abs(audio_data)) + 1e-9) | |
| # Transcription | |
| stt_res = whisper_pipeline(boosted_audio, generate_kwargs={"language": "arabic"}) | |
| raw_text = stt_res["text"].strip() | |
| # Phase 3: Intent Classification (Cheating vs Safe) | |
| if not raw_text: | |
| return {"status": "Silent_Whisper", "label": 0, "whisper_text": ""} | |
| # Inference for text classification | |
| bert_res = text_classifier(raw_text)[0] | |
| is_cheating = 1 if bert_res['label'] == "LABEL_1" else 0 | |
| return { | |
| "status": "Cheating" if is_cheating == 1 else "Safe_Whisper", | |
| "whisper_text": raw_text, | |
| "confidence": float(bert_res['score']), | |
| "label": is_cheating, | |
| "device_used": device | |
| } |