Spaces:

Marek4321
/

QualiLab

Sleeping

App Files Files Community

Marek4321 commited on Jul 9, 2025

Commit

97cca2b

verified ·

1 Parent(s): ae0710d

Update transcription.py

Browse files

Files changed (1) hide show

transcription.py +216 -116

transcription.py CHANGED Viewed

@@ -1,9 +1,9 @@
-# transcription.py - Moduł transkrypcji audio używając OpenAI Whisper
 import os
 import time
 import streamlit as st
-from typing import List, Dict, Optional
 from pathlib import Path
 try:
@@ -32,11 +32,15 @@ class AudioTranscriber:
             'total_cost_estimate': 0
         }
-    def transcribe_files(self, file_paths: List[str], language: str = "pl") -> str:
         """
-        Transkrypcja listy plików audio
         Returns: Połączona transkrypcja wszystkich plików
         """
         transcriptions = []
         for i, file_path in enumerate(file_paths):
@@ -49,8 +53,8 @@ class AudioTranscriber:
                 if len(file_paths) > 1:
                     st.info(f"🎙️ Transkrybuję część {i+1}/{len(file_paths)}")
-                # Transkrypcja pojedynczego pliku
-                transcription = self._transcribe_single_file(file_path, language)
                 if transcription:
                     transcriptions.append(transcription)
@@ -68,12 +72,9 @@ class AudioTranscriber:
         if transcriptions:
             # Jeśli było więcej niż jeden plik, dodaj separatory
             if len(transcriptions) > 1:
-                final_transcription = "\n\n=== CZĘŚĆ 1 ===\n\n".join([
-                    transcriptions[0]
-                ] + [
-                    f"=== CZĘŚĆ {i+1} ===\n\n{text}"
-                    for i, text in enumerate(transcriptions[1:], 1)
-                ])
             else:
                 final_transcription = transcriptions[0]
@@ -81,6 +82,41 @@ class AudioTranscriber:
         else:
             raise Exception("Wszystkie transkrypcje zakończone błędem")
     def _transcribe_single_file(self, file_path: str, language: str = "pl") -> Optional[str]:
         """Transkrypcja pojedynczego pliku"""
         try:
@@ -94,16 +130,32 @@ class AudioTranscriber:
             if file_size_mb > 25:
                 raise Exception(f"Plik za duży dla Whisper API: {file_size_mb:.1f}MB > 25MB")
             st.info(f"📤 Wysyłam do Whisper ({file_size_mb:.1f}MB)...")
             # Otwórz plik i wyślij do API
             with open(file_path, 'rb') as audio_file:
-                transcript = self.client.audio.transcriptions.create(
-                    model=MODEL_SETTINGS['whisper']['model'],
-                    file=audio_file,
-                    language=language if language != 'auto' else None,
-                    temperature=MODEL_SETTINGS['whisper']['temperature']
-                )
             # Estymacja kosztu (Whisper API: $0.006 per minute)
             estimated_duration = file_size_mb * 60  # Rough estimate: 1MB ≈ 1 minute
@@ -111,72 +163,49 @@ class AudioTranscriber:
             self.transcription_stats['total_duration'] += estimated_duration
             self.transcription_stats['total_cost_estimate'] += estimated_cost
-            st.success(f"✅ Transkrypcja otrzymana (~{estimated_duration:.1f}s audio)")
-            return transcript.text
         except Exception as e:
             st.error(f"❌ Błąd Whisper API: {str(e)}")
-            # Jeśli błąd rate limit, poczekaj i spróbuj ponownie
-            if "rate limit" in str(e).lower():
-                st.warning("⏳ Rate limit - czekam 60s i próbuję ponownie...")
-                time.sleep(60)
-                return self._transcribe_single_file(file_path, language)
-            return None
-    def transcribe_with_retries(self, file_path: str, language: str = "pl", max_retries: int = 3) -> Optional[str]:
-        """Transkrypcja z ponawianiem przy błędach"""
-        for attempt in range(max_retries):
-            try:
-                result = self._transcribe_single_file(file_path, language)
-                if result:
-                    return result
-            except Exception as e:
-                st.warning(f"⚠️ Próba {attempt + 1}/{max_retries} nieudana: {str(e)}")
-                if attempt < max_retries - 1:
-                    wait_time = (attempt + 1) * 30  # Exponential backoff
-                    st.info(f"⏳ Czekam {wait_time}s przed następną próbą...")
-                    time.sleep(wait_time)
-                else:
-                    st.error(f"❌ Wszystkie {max_retries} prób nieudane")
-        return None
-    def estimate_transcription_time(self, file_paths: List[str]) -> Dict:
-        """Estymuj czas i koszt transkrypcji"""
-        total_size = sum(os.path.getsize(path) for path in file_paths if os.path.exists(path))
-        total_size_mb = total_size / (1024 * 1024)
-        # Estymacje
-        estimated_duration_minutes = total_size_mb  # 1MB ≈ 1 minute
-        estimated_api_time = estimated_duration_minutes * 0.1  # Whisper jest ~10x szybszy niż realtime
-        estimated_cost = estimated_duration_minutes * 0.006  # $0.006 per minute
-        return {
-            'total_size_mb': total_size_mb,
-            'estimated_audio_duration': estimated_duration_minutes,
-            'estimated_processing_time': estimated_api_time,
-            'estimated_cost_usd': estimated_cost,
-            'files_count': len(file_paths)
-        }
-    def validate_api_key(self) -> bool:
-        """Sprawdź czy klucz API działa"""
-        try:
-            # Spróbuj pobrać listę modeli
-            models = self.client.models.list()
-            return True
         except Exception as e:
-            st.error(f"❌ Nieprawidłowy klucz API: {str(e)}")
-            return False
-    def get_transcription_stats(self) -> Dict:
-        """Zwróć statystyki transkrypcji"""
-        return self.transcription_stats.copy()
     def detect_interview_type(self, transcription: str) -> str:
         """
@@ -189,13 +218,15 @@ class AudioTranscriber:
         fgi_indicators = [
             'moderator', 'grupa', 'wszyscy', 'kto jeszcze', 'a państwo',
             'czy zgadzacie się', 'co myślicie', 'focus group',
-            'uczestnicy', 'grupa fokusowa', 'dyskusja grupowa'
         ]
         # Wskaźniki IDI (Individual)
         idi_indicators = [
             'wywiad indywidualny', 'jeden na jeden', 'prywatnie',
-            'osobiście', 'indywidualne', 'w cztery oczy'
         ]
         fgi_score = sum(1 for indicator in fgi_indicators if indicator in text_lower)
@@ -203,67 +234,121 @@ class AudioTranscriber:
         # Sprawdź także liczbę różnych głosów/osób
         # (FGI zwykle ma więcej przerywników, overlapping speech)
-        interruption_patterns = ['...', '[', ']', '(', ')', '--']
         interruption_count = sum(text_lower.count(pattern) for pattern in interruption_patterns)
-        if fgi_score > idi_score and interruption_count > 10:
             return 'fgi'
-        elif idi_score > fgi_score:
             return 'idi'
-        elif interruption_count > 20:  # Dużo przerywników = prawdopodobnie grupa
             return 'fgi'
         else:
             return 'unknown'
-    def clean_transcription(self, transcription: str) -> str:
-        """Oczyszczenie i formatowanie transkrypcji"""
         try:
-            # Usuń nadmiarowe spacje
-            lines = transcription.split('\n')
-            cleaned_lines = []
-            for line in lines:
-                line = line.strip()
-                if line:  # Pomijaj puste linie
-                    # Usuń nadmiarowe spacje
-                    line = ' '.join(line.split())
-                    cleaned_lines.append(line)
-            # Połącz z pojedynczymi przerwami linii
-            cleaned = '\n\n'.join(cleaned_lines)
-            # Dodaj informacje metadata na początek
-            metadata = f"""TRANSKRYPCJA AUDIO
-Data: {time.strftime('%Y-%m-%d %H:%M')}
-Typ: {self.detect_interview_type(cleaned).upper()}
-Długość: ~{len(cleaned.split())} słów
----
-"""
-            return metadata + cleaned
         except Exception as e:
-            st.warning(f"⚠️ Błąd czyszczenia transkrypcji: {e}")
-            return transcription
-# Funkcje pomocnicze dla kompatybilności
-def validate_audio_file(file_path: str) -> bool:
     """Sprawdź czy plik audio jest prawidłowy"""
     if not os.path.exists(file_path):
-        return False
     # Sprawdź rozmiar
     file_size = os.path.getsize(file_path)
     if file_size == 0:
-        return False
     # Sprawdź rozszerzenie
     valid_extensions = ['.mp3', '.wav', '.mp4', '.m4a', '.aac']
     file_ext = Path(file_path).suffix.lower()
-    return file_ext in valid_extensions
 # Test modułu
 if __name__ == "__main__":
@@ -275,12 +360,27 @@ if __name__ == "__main__":
         print("✅ AudioTranscriber zainicjalizowany")
         # Test rozpoznania typu wywiadu
-        test_fgi = "Moderator: Co wszyscy myślicie o produkcie? Czy zgadzacie się z tym?"
-        test_idi = "Interviewer: A teraz opowiedz mi o swoich doświadczeniach..."
         print(f"Test FGI: {transcriber.detect_interview_type(test_fgi)}")
         print(f"Test IDI: {transcriber.detect_interview_type(test_idi)}")
     except Exception as e:
         print(f"❌ Błąd testu: {e}")

+# transcription.py - Poprawiony moduł transkrypcji
 import os
 import time
 import streamlit as st
+from typing import List, Dict, Optional, Union
 from pathlib import Path
 try:
             'total_cost_estimate': 0
         }
+    def transcribe_files(self, file_paths: Union[str, List[str]], language: str = "pl") -> str:
         """
+        Transkrypcja listy plików audio lub pojedynczego pliku
         Returns: Połączona transkrypcja wszystkich plików
         """
+        # Obsługa pojedynczego pliku
+        if isinstance(file_paths, str):
+            file_paths = [file_paths]
         transcriptions = []
         for i, file_path in enumerate(file_paths):
                 if len(file_paths) > 1:
                     st.info(f"🎙️ Transkrybuję część {i+1}/{len(file_paths)}")
+                # Transkrypcja pojedynczego pliku z retry
+                transcription = self.transcribe_with_retries(file_path, language)
                 if transcription:
                     transcriptions.append(transcription)
         if transcriptions:
             # Jeśli było więcej niż jeden plik, dodaj separatory
             if len(transcriptions) > 1:
+                final_transcription = transcriptions[0]
+                for i, text in enumerate(transcriptions[1:], 1):
+                    final_transcription += f"\n\n=== CZĘŚĆ {i+1} ===\n\n{text}"
             else:
                 final_transcription = transcriptions[0]
         else:
             raise Exception("Wszystkie transkrypcje zakończone błędem")
+    def transcribe_with_retries(self, file_path: str, language: str = "pl", max_retries: int = 3) -> Optional[str]:
+        """Transkrypcja z ponawianiem przy błędach"""
+        for attempt in range(max_retries):
+            try:
+                # Sprawdź rozmiar pliku przed każdą próbą
+                file_size_mb = os.path.getsize(file_path) / (1024 * 1024)
+                if file_size_mb > 25:
+                    raise Exception(f"Plik za duży dla Whisper API: {file_size_mb:.1f}MB > 25MB")
+                result = self._transcribe_single_file(file_path, language)
+                if result:
+                    return result
+            except Exception as e:
+                error_msg = str(e).lower()
+                st.warning(f"⚠️ Próba {attempt + 1}/{max_retries} nieudana: {str(e)}")
+                if attempt < max_retries - 1:
+                    # Exponential backoff z różnymi strategiami
+                    if "rate limit" in error_msg:
+                        wait_time = 60 + (attempt * 30)  # Rate limit = długa przerwa
+                        st.info(f"⏳ Rate limit - czekam {wait_time}s...")
+                    elif "timeout" in error_msg:
+                        wait_time = 30 + (attempt * 15)  # Timeout = średnia przerwa
+                        st.info(f"⏳ Timeout - czekam {wait_time}s...")
+                    else:
+                        wait_time = 15 + (attempt * 10)  # Inne błędy = krótka przerwa
+                        st.info(f"⏳ Błąd - czekam {wait_time}s...")
+                    time.sleep(wait_time)
+                else:
+                    st.error(f"❌ Wszystkie {max_retries} prób nieudane dla {file_path}")
+        return None
     def _transcribe_single_file(self, file_path: str, language: str = "pl") -> Optional[str]:
         """Transkrypcja pojedynczego pliku"""
         try:
             if file_size_mb > 25:
                 raise Exception(f"Plik za duży dla Whisper API: {file_size_mb:.1f}MB > 25MB")
+            # Sprawdź czy plik nie jest pusty
+            if file_size == 0:
+                raise Exception("Plik jest pusty")
             st.info(f"📤 Wysyłam do Whisper ({file_size_mb:.1f}MB)...")
             # Otwórz plik i wyślij do API
             with open(file_path, 'rb') as audio_file:
+                # Ustaw parametry transkrypcji
+                params = {
+                    'model': MODEL_SETTINGS['whisper']['model'],
+                    'file': audio_file,
+                    'temperature': MODEL_SETTINGS['whisper']['temperature'],
+                    'response_format': 'text'  # Zwróć tylko tekst
+                }
+                # Dodaj język tylko jeśli nie jest auto
+                if language != 'auto':
+                    params['language'] = language
+                # Wywołaj API
+                transcript = self.client.audio.transcriptions.create(**params)
+            # Sprawdź czy otrzymaliśmy wynik
+            if not transcript or len(transcript.strip()) == 0:
+                raise Exception("Pusty wynik transkrypcji")
             # Estymacja kosztu (Whisper API: $0.006 per minute)
             estimated_duration = file_size_mb * 60  # Rough estimate: 1MB ≈ 1 minute
             self.transcription_stats['total_duration'] += estimated_duration
             self.transcription_stats['total_cost_estimate'] += estimated_cost
+            st.success(f"✅ Transkrypcja otrzymana ({len(transcript.split())} słów)")
+            # Oczyść i zwróć transkrypcję
+            return self.clean_transcription(transcript)
         except Exception as e:
             st.error(f"❌ Błąd Whisper API: {str(e)}")
+            raise e
+    def clean_transcription(self, transcription: str) -> str:
+        """Oczyszczenie i formatowanie transkrypcji"""
+        try:
+            # Usuń nadmiarowe spacje i znaki
+            cleaned = transcription.strip()
+            # Usuń nadmiarowe spacje
+            cleaned = ' '.join(cleaned.split())
+            # Podziel na akapity w rozsądnych miejscach
+            sentences = cleaned.split('. ')
+            paragraphs = []
+            current_paragraph = []
+            for sentence in sentences:
+                current_paragraph.append(sentence)
+                # Nowy akapit co 3-4 zdania
+                if len(current_paragraph) >= 4:
+                    paragraphs.append('. '.join(current_paragraph) + '.')
+                    current_paragraph = []
+            # Dodaj ostatni akapit
+            if current_paragraph:
+                paragraphs.append('. '.join(current_paragraph))
+            # Połącz akapity
+            formatted = '\n\n'.join(paragraphs)
+            return formatted
         except Exception as e:
+            st.warning(f"⚠️ Błąd formatowania transkrypcji: {e}")
+            return transcription
     def detect_interview_type(self, transcription: str) -> str:
         """
         fgi_indicators = [
             'moderator', 'grupa', 'wszyscy', 'kto jeszcze', 'a państwo',
             'czy zgadzacie się', 'co myślicie', 'focus group',
+            'uczestnicy', 'grupa fokusowa', 'dyskusja grupowa',
+            'co sądzicie', 'może ktoś inny', 'a jak pan/pani'
         ]
         # Wskaźniki IDI (Individual)
         idi_indicators = [
             'wywiad indywidualny', 'jeden na jeden', 'prywatnie',
+            'osobiście', 'indywidualne', 'w cztery oczy',
+            'tylko między nami', 'powiedz mi', 'jak się czujesz'
         ]
         fgi_score = sum(1 for indicator in fgi_indicators if indicator in text_lower)
         # Sprawdź także liczbę różnych głosów/osób
         # (FGI zwykle ma więcej przerywników, overlapping speech)
+        interruption_patterns = ['...', '[niewyraźnie]', '[nakładanie się głosów]', '(śmiech)', '--']
         interruption_count = sum(text_lower.count(pattern) for pattern in interruption_patterns)
+        # Sprawdź długość - FGI są zwykle dłuższe
+        word_count = len(transcription.split())
+        # Logika decyzyjna
+        if fgi_score > idi_score * 1.5 and word_count > 1000:
             return 'fgi'
+        elif idi_score > fgi_score * 1.5:
             return 'idi'
+        elif interruption_count > 10 and word_count > 1500:
             return 'fgi'
+        elif word_count < 800:
+            return 'idi'
         else:
             return 'unknown'
+    def validate_api_key(self) -> bool:
+        """Sprawdź czy klucz API działa"""
         try:
+            # Spróbuj pobrać listę modeli
+            models = self.client.models.list()
+            # Sprawdź czy whisper-1 jest dostępny
+            model_names = [model.id for model in models.data]
+            if 'whisper-1' not in model_names:
+                st.warning("⚠️ Model whisper-1 nie jest dostępny")
+                return False
+            return True
         except Exception as e:
+            st.error(f"❌ Nieprawidłowy klucz API: {str(e)}")
+            return False
+    def get_transcription_stats(self) -> Dict:
+        """Zwróć statystyki transkrypcji"""
+        stats = self.transcription_stats.copy()
+        # Dodaj dodatkowe metryki
+        if stats['total_files'] > 0:
+            stats['success_rate'] = (stats['successful'] / stats['total_files']) * 100
+        else:
+            stats['success_rate'] = 0
+        return stats
+    def estimate_transcription_time(self, file_paths: List[str]) -> Dict:
+        """Estymuj czas i koszt transkrypcji"""
+        valid_files = [path for path in file_paths if os.path.exists(path)]
+        if not valid_files:
+            return {
+                'error': 'Brak prawidłowych plików',
+                'files_count': 0
+            }
+        total_size = sum(os.path.getsize(path) for path in valid_files)
+        total_size_mb = total_size / (1024 * 1024)
+        # Estymacje
+        estimated_duration_minutes = total_size_mb  # 1MB ≈ 1 minute
+        estimated_api_time = estimated_duration_minutes * 0.1  # Whisper jest ~10x szybszy
+        estimated_cost = estimated_duration_minutes * 0.006  # $0.006 per minute
+        # Sprawdź limity
+        files_too_large = []
+        for path in valid_files:
+            file_size_mb = os.path.getsize(path) / (1024 * 1024)
+            if file_size_mb > 25:
+                files_too_large.append((path, file_size_mb))
+        return {
+            'total_size_mb': total_size_mb,
+            'estimated_audio_duration': estimated_duration_minutes,
+            'estimated_processing_time': estimated_api_time,
+            'estimated_cost_usd': estimated_cost,
+            'files_count': len(valid_files),
+            'files_too_large': files_too_large
+        }
+# Funkcje pomocnicze
+def validate_audio_file(file_path: str) -> Tuple[bool, str]:
     """Sprawdź czy plik audio jest prawidłowy"""
     if not os.path.exists(file_path):
+        return False, "Plik nie istnieje"
     # Sprawdź rozmiar
     file_size = os.path.getsize(file_path)
+    file_size_mb = file_size / (1024 * 1024)
     if file_size == 0:
+        return False, "Plik jest pusty"
+    if file_size_mb > 25:
+        return False, f"Plik za duży: {file_size_mb:.1f}MB > 25MB"
     # Sprawdź rozszerzenie
     valid_extensions = ['.mp3', '.wav', '.mp4', '.m4a', '.aac']
     file_ext = Path(file_path).suffix.lower()
+    if file_ext not in valid_extensions:
+        return False, f"Nieobsługiwane rozszerzenie: {file_ext}"
+    return True, "OK"
+def get_file_duration_estimate(file_path: str) -> float:
+    """Estymuj długość pliku audio w minutach"""
+    try:
+        file_size_mb = os.path.getsize(file_path) / (1024 * 1024)
+        # Przybliżenie: 1MB ≈ 1 minuta dla typowego MP3
+        return file_size_mb
+    except:
+        return 0.0
 # Test modułu
 if __name__ == "__main__":
         print("✅ AudioTranscriber zainicjalizowany")
         # Test rozpoznania typu wywiadu
+        test_fgi = """
+        Moderator: Dzień dobry wszystkim. Co wszyscy myślicie o tym produkcie?
+        Uczestnik 1: Ja uważam, że...
+        Uczestnik 2: Ale czy zgadzacie się, że...
+        Moderator: A co sądzicie o tym?
+        """
+        test_idi = """
+        Interviewer: Opowiedz mi o swoich doświadczeniach z tym produktem.
+        Respondent: Moje doświadczenia są bardzo pozytywne...
+        Interviewer: A jak się czujesz gdy używasz tego produktu?
+        """
         print(f"Test FGI: {transcriber.detect_interview_type(test_fgi)}")
         print(f"Test IDI: {transcriber.detect_interview_type(test_idi)}")
+        # Test walidacji pliku
+        test_file = "test.mp3"
+        result, message = validate_audio_file(test_file)
+        print(f"Test walidacji: {result} - {message}")
     except Exception as e:
         print(f"❌ Błąd testu: {e}")