Spaces:

UcsTurkey
/

flare

Paused

App Files Files Community

ciyidogan commited on Jul 11, 2025

Commit

78b5a88

verified ·

1 Parent(s): a4bca86

Update stt/stt_google.py

Browse files

Files changed (1) hide show

stt/stt_google.py +364 -503

stt/stt_google.py CHANGED Viewed

@@ -1,503 +1,364 @@
-"""
-Google Cloud Speech-to-Text Implementation
-"""
-import os
-import asyncio
-from typing import AsyncIterator, AsyncGenerator, Optional, List, Any
-import numpy as np
-from datetime import datetime
-import sys
-import queue
-import threading
-import time
-import traceback
-from utils.logger import log_info, log_error, log_debug, log_warning
-# Import Google Cloud Speech only if available
-try:
-    from google.cloud import speech
-    from google.api_core import exceptions
-    GOOGLE_SPEECH_AVAILABLE = True
-except ImportError:
-    GOOGLE_SPEECH_AVAILABLE = False
-    log_info("⚠️ Google Cloud Speech library not installed")
-from .stt_interface import STTInterface, STTConfig, TranscriptionResult
-class GoogleCloudSTT(STTInterface):
-    """Google Cloud Speech-to-Text implementation"""
-    def __init__(self, credentials_path: Optional[str] = None):
-        """Initialize Google Cloud STT"""
-        log_info("🎤 Creating STT provider: google")
-        # Initialize all required attributes
-        self.client = None
-        self.streaming_config = None
-        self.stream_thread = None
-        self.audio_queue = queue.Queue()
-        self.responses_queue = queue.Queue()
-        self.is_streaming = False
-        self.should_stop = False
-        self.error_message = None
-        self.session_id = 0
-        self.stream_start_time = None
-        # Additional attributes
-        self.lock = threading.Lock()
-        self.single_utterance = False
-        self.chunk_count = 0
-        self.total_bytes = 0
-        self.stop_event = threading.Event()
-        # Set Google credentials
-        if credentials_path:
-            if os.path.exists(credentials_path):
-                os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = credentials_path
-                log_info(f"✅ Google credentials set from: {credentials_path}")
-            else:
-                log_error(f"❌ Credentials file not found: {credentials_path}")
-                raise ValueError(f"Google credentials file not found: {credentials_path}")
-        else:
-            # Fallback to environment variable
-            creds_path = os.environ.get("GOOGLE_APPLICATION_CREDENTIALS")
-            if not creds_path:
-                creds_path = "./credentials/google-service-account.json"
-                if os.path.exists(creds_path):
-                    os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = creds_path
-                    log_info(f"✅ Google credentials set from default: {creds_path}")
-                else:
-                    raise ValueError("Google credentials not found. Please provide credentials_path")
-        # Test credentials
-        try:
-            log_info("🔐 Testing Google credentials...")
-            test_client = speech.SpeechClient()
-            log_info("✅ Google credentials valid")
-        except Exception as e:
-            log_error(f"❌ Invalid Google credentials: {e}")
-            raise
-    def _get_encoding(self, encoding_str: str):
-        """Convert encoding string to Google Speech enum"""
-        if not GOOGLE_SPEECH_AVAILABLE:
-            return None
-        encoding_map = {
-            "WEBM_OPUS": speech.RecognitionConfig.AudioEncoding.WEBM_OPUS,
-            "LINEAR16": speech.RecognitionConfig.AudioEncoding.LINEAR16,
-            "FLAC": speech.RecognitionConfig.AudioEncoding.FLAC,
-            "MP3": speech.RecognitionConfig.AudioEncoding.MP3,
-            "OGG_OPUS": speech.RecognitionConfig.AudioEncoding.OGG_OPUS,
-        }
-        return encoding_map.get(encoding_str, speech.RecognitionConfig.AudioEncoding.WEBM_OPUS)
-    # Alias for compatibility
-    _get_google_encoding = _get_encoding
-    async def stream_audio(self, audio_chunk: bytes) -> AsyncGenerator[TranscriptionResult, None]:
-        """Stream audio chunk and get results"""
-        if not self.is_streaming:
-            raise Exception("Streaming not started")
-        try:
-            chunk_size = len(audio_chunk)
-            # Log first chunk details
-            if self.chunk_count == 0:
-                log_info(f"📤 First chunk - size: {chunk_size} bytes")
-                if audio_chunk.startswith(b'\x1a\x45\xdf\xa3'):
-                    log_info("✅ Valid WEBM header detected")
-                else:
-                    hex_preview = audio_chunk[:20].hex()
-                    log_warning(f"⚠️ Unexpected audio format. First 20 bytes: {hex_preview}")
-            # Try to measure audio level (if it's raw PCM)
-            try:
-                if encoding_str == "LINEAR16":  # Only for raw PCM
-                    audio_array = np.frombuffer(audio_chunk, dtype=np.int16)
-                    if len(audio_array) > 0:
-                        rms = np.sqrt(np.mean(audio_array.astype(float) ** 2))
-                        db = 20 * np.log10(max(rms, 1) / 32768.0)
-                        if self.chunk_count % 50 == 0:
-                            log_info(f"🔊 Audio level: {db:.1f} dB")
-            except:
-                pass
-            # Put chunk in queue
-            self.audio_queue.put(audio_chunk)
-            self.chunk_count += 1
-            self.total_bytes += chunk_size
-            # Log progress
-            if self.chunk_count % 50 == 0:
-                log_info(f"📤 Progress: {self.chunk_count} chunks, {self.total_bytes/1024:.1f}KB total")
-            # Check for responses
-            timeout = 0.1
-            end_time = time.time() + timeout
-            while time.time() < end_time:
-                try:
-                    result = self.responses_queue.get_nowait()
-                    log_info(f"🎯 Got result from queue: is_final={result.is_final}, text='{result.text[:30]}...'")
-                    yield result
-                except queue.Empty:
-                    await asyncio.sleep(0.01)
-                except Exception as e:
-                    log_error(f"Error getting result from queue: {e}")
-                    break
-        except Exception as e:
-            log_error(f"❌ Error in stream_audio: {e}")
-            raise
-    async def stop_streaming(self) -> Optional[TranscriptionResult]:
-        """Stop streaming and clean up all resources"""
-        if not self.is_streaming and not self.stream_thread:
-            log_debug("Already stopped, nothing to do")
-            return None
-        try:
-            log_info(f"🛑 Stopping Google STT streaming session #{self.session_id}")
-            # Set flags
-            self.is_streaming = False
-            self.should_stop = True
-            self.stop_event.set()
-            # Send poison pill
-            if self.audio_queue:
-                try:
-                    self.audio_queue.put(None)
-                except:
-                    pass
-            # Wait for thread
-            if self.stream_thread and self.stream_thread.is_alive():
-                log_info("⏳ Waiting for stream thread to finish...")
-                self.stream_thread.join(timeout=5.0)
-                if self.stream_thread.is_alive():
-                    log_warning("⚠️ STT thread did not stop gracefully after 5s")
-                else:
-                    log_info("✅ Stream thread finished")
-            # Get final result
-            final_result = None
-            if self.responses_queue:
-                while not self.responses_queue.empty():
-                    try:
-                        result = self.responses_queue.get_nowait()
-                        if result.is_final:
-                            final_result = result
-                    except queue.Empty:
-                        break
-            # Close client
-            if self.client:
-                try:
-                    if hasattr(self.client, 'transport') and hasattr(self.client.transport, 'close'):
-                        self.client.transport.close()
-                        log_debug("✅ Client transport closed")
-                    if hasattr(self.client, '_transport') and hasattr(self.client._transport, '_grpc_channel'):
-                        self.client._transport._grpc_channel.close()
-                        log_debug("✅ gRPC channel closed")
-                except Exception as e:
-                    log_warning(f"⚠️ Error closing Google client: {e}")
-                finally:
-                    self.client = None
-            # Reset state
-            self.audio_queue = None
-            self.responses_queue = None
-            self.stream_thread = None
-            self.streaming_config = None
-            self.stop_event.clear()
-            log_info(f"✅ Google STT streaming session #{self.session_id} stopped and cleaned")
-            return final_result
-        except Exception as e:
-            log_error(f"❌ Error during stop_streaming", error=str(e))
-            self.is_streaming = False
-            self.stream_thread = None
-            self.client = None
-            self.streaming_config = None
-            self.stop_event.clear()
-            self.audio_queue = None
-            self.responses_queue = None
-            return None
-    def supports_realtime(self) -> bool:
-        """Google Cloud STT supports real-time streaming"""
-        return True
-    def get_supported_languages(self) -> List[str]:
-        """Get list of supported language codes"""
-        return [
-            "tr-TR", "en-US", "en-GB", "de-DE", "fr-FR", "es-ES",
-            "it-IT", "pt-BR", "ru-RU", "ja-JP", "ko-KR", "zh-CN", "ar-SA"
-        ]
-    def get_provider_name(self) -> str:
-        """Get provider name"""
-        return "google"
-    def _reset_session(self):
-        """Reset session data"""
-        # Clear queues
-        while not self.audio_queue.empty():
-            try:
-                self.audio_queue.get_nowait()
-            except queue.Empty:
-                break
-        while not self.responses_queue.empty():
-            try:
-                self.responses_queue.get_nowait()
-            except queue.Empty:
-                break
-        # Reset state
-        self.should_stop = False
-        self.error_message = None
-        self.session_id += 1
-        self.stream_start_time = time.time()
-        self.chunk_count = 0
-        self.total_bytes = 0
-        log_info(f"🔄 Google STT session data reset. New session ID: {self.session_id}")
-        # Create fresh queues
-        self.audio_queue = queue.Queue()
-        self.responses_queue = queue.Queue()
-        log_debug("✅ Created fresh queues")
-    def _create_fresh_queues(self):
-        """Create fresh queue instances"""
-        if self.audio_queue:
-            while not self.audio_queue.empty():
-                try:
-                    self.audio_queue.get_nowait()
-                except:
-                    pass
-        if self.responses_queue:
-            while not self.responses_queue.empty():
-                try:
-                    self.responses_queue.get_nowait()
-                except:
-                    pass
-        self.audio_queue = queue.Queue(maxsize=1000)
-        self.responses_queue = queue.Queue(maxsize=100)
-        log_debug("✅ Created fresh queues")
-    def _request_generator(self):
-        """Generate requests for the streaming recognize API"""
-        # First request with config
-        yield speech.StreamingRecognizeRequest(streaming_config=self.streaming_config)
-        # Audio chunks
-        while not self.should_stop:
-            try:
-                audio_chunk = self.audio_queue.get(timeout=0.1)
-                if audio_chunk is None:
-                    log_info("📛 Poison pill received, stopping request generator")
-                    break
-                yield speech.StreamingRecognizeRequest(audio_content=audio_chunk)
-            except queue.Empty:
-                continue
-            except Exception as e:
-                log_error(f"Error in request generator: {e}")
-                break
-        log_info(f"📊 Request generator finished. Total chunks: {self.chunk_count}, Total bytes: {self.total_bytes}")
-    async def start_streaming(self, config: STTConfig) -> None:
-        """Initialize streaming session with clean state"""
-        try:
-            # Thread safety için lock kullan
-            async with asyncio.Lock():
-                # Clean up any existing stream
-                if self.is_streaming or self.stream_thread:
-                    log_warning("⚠️ Previous stream still active, stopping it first")
-                    await self.stop_streaming()
-                    await asyncio.sleep(0.5)
-                # Double-check after cleanup
-                if self.stream_thread and self.stream_thread.is_alive():
-                    log_error(f"❌ Stream thread STILL running after cleanup! Thread: {self.stream_thread.name}")
-                    raise Exception("Failed to stop previous stream thread")
-                # Reset session
-                self._reset_session()
-                self.single_utterance = config.single_utterance
-                self.current_encoding = config.encoding
-                log_info(f"🎤 Starting Google STT streaming session #{self.session_id} with config: {config}")
-                # Create fresh queues
-                self._create_fresh_queues()
-                self.stop_event.clear()
-                self.should_stop = False
-                # Create new client
-                self.client = speech.SpeechClient()
-                log_info("✅ Created new Google Speech client")
-                # Create recognition config
-                recognition_config = speech.RecognitionConfig(
-                    encoding=speech.RecognitionConfig.AudioEncoding.WEBM_OPUS,
-                    sample_rate_hertz=16000,
-                    language_code="tr-TR",
-                    enable_automatic_punctuation=True,
-                    model="latest_long",
-                    use_enhanced=True,
-                    max_alternatives=1,
-                    metadata=speech.RecognitionMetadata(
-                        interaction_type=speech.RecognitionMetadata.InteractionType.VOICE_SEARCH,
-                        microphone_distance=speech.RecognitionMetadata.MicrophoneDistance.NEARFIELD,
-                        recording_device_type=speech.RecognitionMetadata.RecordingDeviceType.PC,
-                    )
-                )
-                # Create streaming config with VAD
-                self.streaming_config = speech.StreamingRecognitionConfig(
-                    config=recognition_config,
-                    interim_results=True,
-                    single_utterance=False,
-                    enable_voice_activity_events=True  # ✅ VAD events enabled
-                )
-                self.is_streaming = True
-                self.stop_event.clear()
-                # Thread başlatmadan önce son kontrol
-                if self.stream_thread is not None:
-                    log_error("❌ stream_thread should be None at this point!")
-                    self.stream_thread = None
-                self.is_streaming = True
-                # Start streaming thread with unique ID
-                thread_id = f"GoogleSTT-Session-{self.session_id}-{int(time.time()*1000)}"
-                self.stream_thread = threading.Thread(
-                    target=self._run_stream,
-                    name=thread_id
-                )
-                self.stream_thread.daemon = True
-                log_info(f"🚀 Starting thread: {thread_id}")
-                self.stream_thread.start()
-                log_info(f"✅ Google STT streaming session #{self.session_id} started successfully")
-        except Exception as e:
-            log_error(f"❌ Failed to start Google STT streaming", error=str(e))
-            self.is_streaming = False
-            self.client = None
-            self._create_fresh_queues()
-            raise
-    def _run_stream(self):
-        """Run the streaming recognition loop in a separate thread"""
-        try:
-            thread_id = threading.current_thread().ident
-            log_info(f"🎤 Google STT stream thread started - Thread ID: {thread_id}, Session: {self.session_id}")
-            # Create request generator
-            requests = self._request_generator()
-            # Create streaming client
-            log_info(f"🎤 Creating Google STT streaming client... Thread ID: {thread_id}")
-            # Get responses (no timeout parameter!)
-            responses = self.client.streaming_recognize(requests)
-            # Track responses
-            first_response_time = None
-            response_count = 0
-            # Process responses
-            for response in responses:
-                if self.should_stop:
-                    log_info("🛑 Stop flag detected, ending stream")
-                    break
-                response_count += 1
-                if first_response_time is None:
-                    first_response_time = time.time()
-                    elapsed = first_response_time - self.stream_start_time
-                    log_info(f"🎉 FIRST RESPONSE from Google STT after {elapsed:.2f}s")
-                # Check for VAD events
-                if hasattr(response, 'speech_event_type') and response.speech_event_type:
-                    event_type = response.speech_event_type
-                    log_info(f"🎙️ VAD Event: {event_type}")
-                    if event_type == speech.StreamingRecognizeResponse.SpeechEventType.END_OF_SINGLE_UTTERANCE:
-                        log_info("🔚 End of utterance detected by VAD")
-                # Log response
-                has_results = len(response.results) > 0 if hasattr(response, 'results') else False
-                log_info(f"📨 Google STT Response #{response_count}: has_results={has_results}")
-                if not response.results:
-                    continue
-                # Process results
-                for result_idx, result in enumerate(response.results):
-                    # Check result type
-                    result_type = "🔄 INTERIM" if not result.is_final else "✅ FINAL"
-                    stability = getattr(result, 'stability', 0.0)
-                    log_info(f"{result_type} Result #{result_idx}: "
-                            f"alternatives={len(result.alternatives)}, "
-                            f"stability={stability:.3f}")
-                    if result.alternatives:
-                        best_alternative = result.alternatives[0]
-                        transcript = best_alternative.transcript
-                        confidence = best_alternative.confidence if result.is_final else stability
-                        # Log transcript
-                        if result.is_final:
-                            log_info(f"✅ FINAL TRANSCRIPT: '{transcript}' "
-                                    f"(confidence: {confidence:.3f})")
-                        else:
-                            log_info(f"🔄 INTERIM TRANSCRIPT: '{transcript[:100]}...' "
-                                    f"(stability: {stability:.3f})")
-                        # Queue result
-                        result_obj = TranscriptionResult(
-                            text=transcript,
-                            is_final=result.is_final,
-                            confidence=confidence,
-                            timestamp=datetime.utcnow()
-                        )
-                        self.responses_queue.put(result_obj)
-                        log_info(f"📥 {'FINAL' if result.is_final else 'INTERIM'} result queued")
-            # Log completion
-            if response_count == 0:
-                log_error("❌ Google STT stream ended without ANY responses!")
-            else:
-                log_info(f"✅ Google STT stream ended normally after {response_count} responses")
-        except Exception as e:
-            log_error(f"❌ Google STT error: {e}")
-            if hasattr(e, 'details'):
-                log_error(f"Error details: {e.details}")
-            self.error_message = str(e)
-        finally:
-            log_info("🎤 Google STT stream thread ended")
-            with self.lock:
-                self.is_streaming = False

+"""
+Google Cloud Speech-to-Text Implementation
+"""
+import asyncio
+from typing import AsyncIterator, Optional, List, Any
+from datetime import datetime
+import queue
+import threading
+import traceback
+import os
+from google.cloud import speech
+from google.cloud.speech import RecognitionConfig, StreamingRecognitionConfig
+import google.auth
+from utils.logger import log_info, log_error, log_debug, log_warning
+from .stt_interface import STTInterface, STTConfig, TranscriptionResult
+class GoogleSTT(STTInterface):
+"""Google Cloud Speech-to-Text implementation"""
+def __init__(self, credentials_path: Optional[str] = None):
+    """
+    Initialize Google STT
+    Args:
+        credentials_path: Path to service account JSON file (optional if using default credentials)
+    """
+    try:
+        # Initialize client
+        if credentials_path:
+            self.client = speech.SpeechClient.from_service_account_file(credentials_path)
+            log_info(f"✅ Google STT initialized with service account: {credentials_path}")
+        else:
+            # Use default credentials (ADC)
+            self.client = speech.SpeechClient()
+            log_info("✅ Google STT initialized with default credentials")
+        # Streaming state
+        self.is_streaming = False
+        self.audio_generator = None
+        self.responses_stream = None
+        self.audio_queue = queue.Queue()
+        self.results_queue = queue.Queue(maxsize=100)
+        # Session tracking
+        self.session_id = 0
+        self.total_audio_bytes = 0
+        self.total_chunks = 0
+        # Threading
+        self.stream_thread = None
+        self.stop_event = threading.Event()
+    except Exception as e:
+        log_error(f"❌ Failed to initialize Google STT: {str(e)}")
+        raise
+def _map_language_code(self, language: str) -> str:
+    """Map language codes to Google format"""
+    # Google uses BCP-47 language codes
+    language_map = {
+        "tr-TR": "tr-TR",
+        "en-US": "en-US",
+        "en-GB": "en-GB",
+        "de-DE": "de-DE",
+        "fr-FR": "fr-FR",
+        "es-ES": "es-ES",
+        "it-IT": "it-IT",
+        "pt-BR": "pt-BR",
+        "ru-RU": "ru-RU",
+        "ja-JP": "ja-JP",
+        "ko-KR": "ko-KR",
+        "zh-CN": "zh-CN",
+        "ar-SA": "ar-SA",
+    }
+    return language_map.get(language, language)
+async def start_streaming(self, config: STTConfig) -> None:
+    """Initialize streaming session"""
+    try:
+        # Stop any existing stream
+        if self.is_streaming:
+            log_warning("⚠️ Previous stream still active, stopping it first")
+            await self.stop_streaming()
+            await asyncio.sleep(0.5)
+        # Reset session data
+        self._reset_session_data()
+        log_info(f"🎤 Starting Google STT - Session #{self.session_id}")
+        # Configure recognition settings
+        language_code = self._map_language_code(config.language)
+        # ✅ Google STT best practices for Turkish and single utterance
+        recognition_config = RecognitionConfig(
+            encoding=RecognitionConfig.AudioEncoding.LINEAR16,
+            sample_rate_hertz=16000,
+            language_code=language_code,
+            # ✅ Single utterance için ideal ayarlar
+            enable_automatic_punctuation=True,
+            # Model selection - latest_long for better accuracy
+            model="latest_long",
+            # Use enhanced model if available (better for Turkish)
+            use_enhanced=True,
+            # Single channel audio
+            audio_channel_count=1,
+            # Boost adaptation for better Turkish recognition
+            speech_contexts=[
+                speech.SpeechContext(
+                    phrases=[],  # Boş bırakıyoruz ama context var
+                    boost=20.0
+                )
+            ],
+            # Alternative transcripts for debugging
+            max_alternatives=1,
+            # Profanity filter disabled for accuracy
+            profanity_filter=False,
+            # Enable speaker diarization if needed
+            enable_speaker_diarization=False,
+            # Word level confidence
+            enable_word_confidence=False,
+            enable_spoken_punctuation=False,
+            enable_spoken_emojis=False,
+        )
+        # ✅ Streaming config - optimized for final results only
+        self.streaming_config = StreamingRecognitionConfig(
+            config=recognition_config,
+            # ✅ Single utterance mode - stops after detecting speech end
+            single_utterance=True,
+            # ✅ No interim results - only final
+            interim_results=False
+        )
+        log_info(f"🔧 Google STT config: language={language_code}, "
+                f"model=latest_long, enhanced=True, "
+                f"single_utterance=True, interim_results=False")
+        # Start streaming in background thread
+        self.stop_event.clear()
+        self.stream_thread = threading.Thread(
+            target=self._stream_recognition,
+            daemon=True
+        )
+        self.stream_thread.start()
+        self.is_streaming = True
+        log_info(f"✅ Google STT started - Ready for speech")
+    except Exception as e:
+        log_error(f"❌ Failed to start Google STT", error=str(e))
+        self.is_streaming = False
+        raise
+def _stream_recognition(self):
+    """Background thread for streaming recognition"""
+    try:
+        log_debug("🎙️ Starting recognition stream thread")
+        # Create audio generator
+        audio_generator = self._audio_generator()
+        # Start streaming recognition
+        responses = self.client.streaming_recognize(
+            self.streaming_config,
+            audio_generator
+        )
+        # Process responses
+        for response in responses:
+            if self.stop_event.is_set():
+                break
+            if not response.results:
+                continue
+            # Process each result
+            for result in response.results:
+                if not result.alternatives:
+                    continue
+                # Get best alternative
+                alternative = result.alternatives[0]
+                # Only process if we have transcript
+                if alternative.transcript:
+                    transcription_result = TranscriptionResult(
+                        text=alternative.transcript,
+                        is_final=result.is_final,
+                        confidence=alternative.confidence,
+                        timestamp=datetime.now().timestamp()
+                    )
+                    try:
+                        self.results_queue.put(transcription_result)
+                        if result.is_final:
+                            log_info(f"🎯 FINAL TRANSCRIPT: '{alternative.transcript}' "
+                                   f"(confidence: {alternative.confidence:.2f})")
+                            # Single utterance mode will end stream after this
+                            break
+                        else:
+                            # This shouldn't happen with interim_results=False
+                            log_debug(f"📝 Transcript: '{alternative.transcript}'")
+                    except queue.Full:
+                        log_warning("⚠️ Results queue full")
+            # Check if stream ended due to single_utterance
+            if hasattr(response, 'speech_event_type'):
+                if response.speech_event_type == speech.StreamingRecognizeResponse.SpeechEventType.END_OF_SINGLE_UTTERANCE:
+                    log_info("🔚 End of single utterance detected")
+                    break
+    except Exception as e:
+        if not self.stop_event.is_set():
+            log_error(f"❌ Recognition stream error: {str(e)}")
+            # Put error in queue
+            error_result = TranscriptionResult(
+                text="",
+                is_final=True,
+                confidence=0.0,
+                timestamp=datetime.now().timestamp()
+            )
+            self.results_queue.put(error_result)
+    finally:
+        log_debug("🎙️ Recognition stream thread ended")
+        self.is_streaming = False
+def _audio_generator(self):
+    """Generator that yields audio chunks for streaming"""
+    while not self.stop_event.is_set():
+        try:
+            # Get audio chunk with timeout
+            chunk = self.audio_queue.get(timeout=0.1)
+            if chunk is None:  # Sentinel value
+                break
+            yield chunk
+        except queue.Empty:
+            continue
+        except Exception as e:
+            log_error(f"❌ Audio generator error: {str(e)}")
+            break
+async def stream_audio(self, audio_chunk: bytes) -> AsyncIterator[TranscriptionResult]:
+    """Stream audio chunk and get transcription results"""
+    if not self.is_streaming:
+        raise RuntimeError("Streaming not started. Call start_streaming() first.")
+    try:
+        # Add audio to queue for background thread
+        self.audio_queue.put(audio_chunk)
+        self.total_chunks += 1
+        self.total_audio_bytes += len(audio_chunk)
+        # Log progress
+        if self.total_chunks % 50 == 0:
+            log_debug(f"📊 Processing... {self.total_chunks} chunks, {self.total_audio_bytes/1024:.1f}KB")
+        # Check for results
+        while True:
+            try:
+                result = self.results_queue.get_nowait()
+                # Log for debugging
+                log_debug(f"🎯 Yielding result: is_final={result.is_final}, text='{result.text}'")
+                yield result
+                # If final result, stream will end
+                if result.is_final:
+                    self.is_streaming = False
+            except queue.Empty:
+                break
+    except Exception as e:
+        log_error(f"❌ Error streaming audio", error=str(e))
+        self.is_streaming = False
+        raise
+async def stop_streaming(self) -> Optional[TranscriptionResult]:
+    """Stop streaming and clean up"""
+    if not self.is_streaming:
+        log_debug("Already stopped, nothing to do")
+        return None
+    try:
+        log_info(f"🛑 Stopping Google STT session #{self.session_id}")
+        self.is_streaming = False
+        # Signal stop
+        self.stop_event.set()
+        # Send sentinel to audio queue
+        self.audio_queue.put(None)
+        # Wait for thread to finish
+        if self.stream_thread and self.stream_thread.is_alive():
+            self.stream_thread.join(timeout=2.0)
+        # Get final result if any
+        final_result = None
+        while not self.results_queue.empty():
+            try:
+                result = self.results_queue.get_nowait()
+                if result.is_final and result.text:
+                    final_result = result
+            except queue.Empty:
+                break
+        log_info(f"✅ Google STT session #{self.session_id} stopped")
+        return final_result
+    except Exception as e:
+        log_error(f"❌ Error during stop_streaming", error=str(e))
+        self.is_streaming = False
+        return None
+def _reset_session_data(self):
+    """Reset session-specific data"""
+    # Clear queues
+    while not self.audio_queue.empty():
+        try:
+            self.audio_queue.get_nowait()
+        except:
+            pass
+    while not self.results_queue.empty():
+        try:
+            self.results_queue.get_nowait()
+        except:
+            pass
+    # Reset counters
+    self.total_audio_bytes = 0
+    self.total_chunks = 0
+    self.session_id += 1
+    log_debug(f"🔄 Session data reset. New session ID: {self.session_id}")
+def supports_realtime(self) -> bool:
+    """Google STT supports real-time streaming"""
+    return True
+def get_supported_languages(self) -> List[str]:
+    """Get list of supported language codes"""
+    # Google Cloud Speech-to-Text supported languages (partial list)
+    # Full list: https://cloud.google.com/speech-to-text/docs/languages
+    return [
+        "tr-TR", "en-US", "en-GB", "en-AU", "en-CA", "en-IN",
+        "es-ES", "es-MX", "es-AR", "fr-FR", "fr-CA", "de-DE",
+        "it-IT", "pt-BR", "pt-PT", "ru-RU", "ja-JP", "ko-KR",
+        "zh-CN", "zh-TW", "ar-SA", "ar-EG", "hi-IN", "nl-NL",
+        "pl-PL", "sv-SE", "da-DK", "no-NO", "fi-FI", "el-GR",
+        "he-IL", "th-TH", "vi-VN", "id-ID", "ms-MY", "fil-PH"
+    ]
+def get_provider_name(self) -> str:
+    """Get provider name"""
+    return "google"