Spaces:

SsebaA
/

x

Sleeping

App Files Files Community

SsebaA commited on Apr 20

Commit

a222a41

verified ·

1 Parent(s): d1a3550

Update models.py

Browse files

Files changed (1) hide show

models.py +129 -100

models.py CHANGED Viewed

@@ -1,125 +1,154 @@
 """
 VoiceNote AI - Models
-Mistral AI client (HTTP API) and Whisper ASR model
 """
 import logging
-import requests
 import torch
-from transformers import pipeline
 from config import Config
 logger = logging.getLogger(__name__)
 class MistralClient:
-    """Mistral AI HTTP API Client"""
     def __init__(self):
-        """Initialize Mistral client with HTTP API"""
-        self.api_key = Config.MISTRAL_API_KEY
-        self.api_url = Config.MISTRAL_API_URL
-        self.model = Config.MISTRAL_MODEL
-        if not self.api_key:
-            raise ValueError("MISTRAL_API_KEY not found in environment")
-        logger.info("Mistral client initialized with HTTP API")
     def generate(self, prompt: str, max_tokens: int = 500, temperature: float = 0.1) -> str:
         """
-        Generate text using Mistral AI API
         Args:
-            prompt: Input prompt
-            max_tokens: Maximum tokens to generate
-            temperature: Sampling temperature
         Returns:
-            Generated text
         """
         headers = {
             "Content-Type": "application/json",
-            "Authorization": f"Bearer {self.api_key}"
         }
         payload = {
-            "model": self.model,
-            "messages": [
-                {"role": "user", "content": prompt}
-            ],
             "max_tokens": max_tokens,
-            "temperature": temperature
         }
-        try:
-            response = requests.post(
-                self.api_url,
-                headers=headers,
-                json=payload,
-                timeout=30
-            )
-            response.raise_for_status()
-            result = response.json()
-            return result['choices'][0]['message']['content']
-        except requests.exceptions.RequestException as e:
-            logger.error(f"Mistral API error: {e}")
-            raise
-class ASRModel:
-    """Automatic Speech Recognition using Whisper"""
-    def __init__(self):
-        """Initialize Whisper ASR model"""
-        self.model_name = Config.ASR_MODEL_NAME
-        self.language = Config.ASR_LANGUAGE
-        self.device = Config.ASR_DEVICE
-        self.dtype = Config.ASR_DTYPE
-        logger.info(f"Loading ASR model: {self.model_name}")
-        # Convert dtype string to torch dtype
-        torch_dtype = torch.float32 if self.dtype == "float32" else torch.float16
-        # Load model on CPU with float32 to avoid GPU dtype issues
-        # Enable long-form transcription with chunk_length_s
-        self.pipe = pipeline(
-            "automatic-speech-recognition",
-            model=self.model_name,
-            device=self.device,
-            torch_dtype=torch_dtype,
-            chunk_length_s=30,  # Enable chunking for long audio (>30s)
-            return_timestamps=False  # Don't return timestamps, just text
         )
-        logger.info(f"ASR model loaded successfully on {self.device}")
-    def transcribe(self, audio_path: str) -> str:
-        """
-        Transcribe audio file to text
-        Args:
-            audio_path: Path to audio file
-        Returns:
-            Transcribed text
-        """
-        logger.info(f"Transcribing audio: {audio_path}")
-        try:
-            # Pass language in generate_kwargs, NOT in model initialization
-            result = self.pipe(
-                audio_path,
-                generate_kwargs={"language": self.language}
-            )
-            text = result["text"].strip()
-            logger.info(f"Transcription successful: {len(text)} characters")
-            return text
-        except Exception as e:
-            logger.error(f"Transcription error: {e}")
-            raise

 """
 VoiceNote AI - Models
+ASR (Whisper) and LLM (Mistral) clients + DeepL translation layer
 """
 import logging
 import torch
+import deepl
+import requests
+from transformers import pipeline as hf_pipeline
 from config import Config
 logger = logging.getLogger(__name__)
+# ══════════════════════════════════════════════════════════
+# ASR — KBLab fine-tuned Whisper for Swedish
+# ══════════════════════════════════════════════════════════
+class WhisperASR:
+    """
+    Swedish ASR using KBLab's fine-tuned Whisper model.
+    KBLab/whisper-large-v3-swedish is trained on Swedish speech corpora,
+    significantly outperforming openai/whisper-small on Swedish medical text.
+    Reference: Vesterbacka et al. (2025), 'Swedish Whispers'.
+    The model runs locally on ZeroGPU — no audio leaves the server.
+    Audio is split into 30-second chunks to avoid GPU memory issues.
+    """
+    def __init__(self):
+        self._pipe = None
+    def _load(self):
+        """Lazy-load the model on first call (ZeroGPU requires GPU context)."""
+        if self._pipe is None:
+            logger.info(f"Loading ASR model: {Config.ASR_MODEL_NAME}")
+            self._pipe = hf_pipeline(
+                task="automatic-speech-recognition",
+                model=Config.ASR_MODEL_NAME,
+                torch_dtype=torch.float16,
+                device="cuda",
+            )
+        return self._pipe
+    def transcribe(self, audio_path: str) -> str:
+        """
+        Transcribe a Swedish audio file to text.
+        Args:
+            audio_path: Path to audio file (wav/mp3/m4a)
+        Returns:
+            Transcribed Swedish text
+        """
+        pipe = self._load()
+        result = pipe(
+            audio_path,
+            generate_kwargs={"language": Config.ASR_LANGUAGE, "task": "transcribe"},
+            chunk_length_s=Config.ASR_CHUNK_LENGTH_S,
+            stride_length_s=Config.ASR_STRIDE_LENGTH_S,
+            return_timestamps=False,
+        )
+        return result["text"].strip()
+# ══════════════════════════════════════════════════════════
+# TRANSLATION — DeepL (Frankfurt, within EU)
+# ══════════════════════════════════════════════════════════
+class DeepLTranslator:
+    """
+    Translates anonymized Swedish text to English via DeepL API.
+    Why: Mistral AI has limited Swedish NLP capability. Zero-shot and
+    Chain-of-Thought prompting in Swedish often produces empty or
+    incorrect VIPS output. Translating to English first resolves this
+    while keeping all data within EU jurisdiction (DeepL Frankfurt).
+    Data flow remains GDPR-compliant:
+      Whisper [local GPU] → GDPR filter [local] → DeepL [Frankfurt 🇩🇪]
+      → Mistral [Paris 🇫🇷] → result
+    """
+    def __init__(self):
+        if not Config.DEEPL_API_KEY:
+            raise EnvironmentError("DEEPL_API_KEY saknas i HuggingFace Secrets.")
+        self._translator = deepl.Translator(Config.DEEPL_API_KEY)
+    def translate(self, swedish_text: str) -> str:
+        """
+        Translate Swedish text to English.
+        Args:
+            swedish_text: Anonymized Swedish patient text
+        Returns:
+            English translation
+        """
+        result = self._translator.translate_text(
+            swedish_text,
+            source_lang="SV",
+            target_lang="EN-US",
+        )
+        logger.info("DeepL translation completed (SV → EN)")
+        return result.text
+# ══════════════════════════════════════════════════════════
+# LLM — Mistral AI (Paris, within EU)
+# ══════════════════════════════════════════════════════════
 class MistralClient:
+    """
+    Mistral AI client for VIPS classification.
+    Mistral is based in Paris (France) — data stays within EU/GDPR.
+    """
     def __init__(self):
+        if not Config.MISTRAL_API_KEY:
+            raise EnvironmentError("MISTRAL_API_KEY saknas i HuggingFace Secrets.")
+        self._api_key = Config.MISTRAL_API_KEY
     def generate(self, prompt: str, max_tokens: int = 500, temperature: float = 0.1) -> str:
         """
+        Send a prompt to Mistral and return the generated text.
         Args:
+            prompt: Full prompt string (system + user content)
+            max_tokens: Maximum tokens in response
+            temperature: Sampling temperature (low = more deterministic)
         Returns:
+            Model response text
         """
         headers = {
+            "Authorization": f"Bearer {self._api_key}",
             "Content-Type": "application/json",
         }
         payload = {
+            "model": Config.MISTRAL_MODEL,
+            "messages": [{"role": "user", "content": prompt}],
             "max_tokens": max_tokens,
+            "temperature": temperature,
         }
+        response = requests.post(
+            "https://api.mistral.ai/v1/chat/completions",
+            headers=headers,
+            json=payload,
+            timeout=60,
         )
+        response.raise_for_status()
+        return response.json()["choices"][0]["message"]["content"].strip()