Spaces:
Runtime error
Runtime error
| """ | |
| Voice Processing Service for the AI Chatbot with Reusable Intelligence | |
| Handles the cleaning and intent extraction from raw voice-to-text strings | |
| """ | |
| import asyncio | |
| import json | |
| from typing import Dict, Optional, Tuple | |
| from dataclasses import dataclass | |
| import uuid | |
| from datetime import datetime | |
| class VoiceProcessingResult: | |
| """Data class for voice processing results""" | |
| cleaned_text: str | |
| extracted_intent: str | |
| confidence_score: float | |
| processing_time: float | |
| original_audio_path: Optional[str] = None | |
| class VoiceProcessingService: | |
| """Service class for processing voice input and extracting intent""" | |
| def __init__(self): | |
| # In a real implementation, this would initialize speech recognition models | |
| # For now, we'll simulate processing | |
| pass | |
| async def process_voice_input(self, raw_text: str, audio_path: Optional[str] = None) -> VoiceProcessingResult: | |
| """ | |
| Process raw voice-to-text input to clean text and extract intent | |
| """ | |
| start_time = datetime.now() | |
| # Clean the raw text | |
| cleaned_text = await self._clean_text(raw_text) | |
| # Extract intent from the cleaned text | |
| extracted_intent, confidence_score = await self._extract_intent(cleaned_text) | |
| end_time = datetime.now() | |
| processing_time = (end_time - start_time).total_seconds() | |
| return VoiceProcessingResult( | |
| cleaned_text=cleaned_text, | |
| extracted_intent=extracted_intent, | |
| confidence_score=confidence_score, | |
| processing_time=processing_time, | |
| original_audio_path=audio_path | |
| ) | |
| async def _clean_text(self, raw_text: str) -> str: | |
| """ | |
| Clean raw voice-to-text output | |
| Removes filler words, corrects common speech-to-text errors | |
| """ | |
| # Remove common filler words and normalize | |
| cleaned = raw_text.lower().strip() | |
| # Common speech-to-text corrections | |
| corrections = { | |
| "umm": "", | |
| "uh": "", | |
| "uhh": "", | |
| "ah": "", | |
| "like": "", | |
| "you know": "", | |
| "right": "", | |
| "okay": "", | |
| "so": "", | |
| } | |
| for word, replacement in corrections.items(): | |
| cleaned = cleaned.replace(word, replacement) | |
| # Remove extra whitespace | |
| cleaned = ' '.join(cleaned.split()) | |
| # Capitalize first letter | |
| if cleaned: | |
| cleaned = cleaned[0].upper() + cleaned[1:] if len(cleaned) > 1 else cleaned.upper() | |
| return cleaned | |
| async def _extract_intent(self, text: str) -> Tuple[str, float]: | |
| """ | |
| Extract intent from cleaned text with confidence score | |
| """ | |
| text_lower = text.lower() | |
| # Define common intents and their keywords | |
| intents = { | |
| "task_add": { | |
| "keywords": ["add", "create", "make", "new", "task", "kam", "bnao", "shamil"], | |
| "confidence_boost_keywords": ["add task", "create task", "kam shamil"] | |
| }, | |
| "task_list": { | |
| "keywords": ["list", "show", "display", "dikhao", "list karo", "kya hai"], | |
| "confidence_boost_keywords": ["show tasks", "list tasks", "kam dikhao"] | |
| }, | |
| "task_complete": { | |
| "keywords": ["complete", "done", "finish", "hogaya", "ho gaya", "khatam"], | |
| "confidence_boost_keywords": ["mark done", "complete task", "kam khatam"] | |
| }, | |
| "task_delete": { | |
| "keywords": ["delete", "remove", "delete", "hatado", "nikalo", "khatam"], | |
| "confidence_boost_keywords": ["delete task", "remove task", "kam hatao"] | |
| }, | |
| "greeting": { | |
| "keywords": ["hello", "hi", "hey", "helo", "kese ho", "kaia hal", "assalam"], | |
| "confidence_boost_keywords": ["hello there", "hi there", "helo"] | |
| }, | |
| "question": { | |
| "keywords": ["what", "how", "why", "kya", "kese", "kyun", "kaia"], | |
| "confidence_boost_keywords": ["what is", "how to", "kya hai", "kese"] | |
| }, | |
| "affirmation": { | |
| "keywords": ["yes", "yeah", "sure", "jeee", "haan", "jaroor", "ji"], | |
| "confidence_boost_keywords": ["yes please", "sure thing", "haan ji"] | |
| }, | |
| "negation": { | |
| "keywords": ["no", "nope", "nahi", "mat", "mtlb", "nahe", "nai"], | |
| "confidence_boost_keywords": ["no thanks", "no please", "nahi chahiye"] | |
| } | |
| } | |
| best_intent = "unknown" | |
| best_confidence = 0.0 | |
| for intent, config in intents.items(): | |
| confidence = 0 | |
| # Score based on regular keywords | |
| for keyword in config["keywords"]: | |
| if keyword in text_lower: | |
| confidence += 1 | |
| # Boost score for specific phrases | |
| for phrase in config["confidence_boost_keywords"]: | |
| if phrase in text_lower: | |
| confidence += 2 # Higher weight for specific phrases | |
| # Calculate confidence as percentage of matched keywords | |
| if confidence > 0: | |
| # Normalize based on the length of the input text | |
| confidence_ratio = min(confidence / len(text_lower.split()), 1.0) | |
| final_confidence = min(confidence_ratio * 2, 1.0) # Boost slightly but cap at 1.0 | |
| if final_confidence > best_confidence: | |
| best_confidence = final_confidence | |
| best_intent = intent | |
| # Set a minimum confidence threshold | |
| if best_confidence < 0.1: | |
| best_intent = "unknown" | |
| best_confidence = 0.0 | |
| return best_intent, best_confidence | |
| async def validate_voice_input(self, raw_text: str) -> bool: | |
| """ | |
| Validate if the voice input is usable | |
| """ | |
| if not raw_text or len(raw_text.strip()) == 0: | |
| return False | |
| # Check if text is just noise or common meaningless phrases | |
| invalid_phrases = [ | |
| "noise", "background", "static", "garbage", "unintelligible", | |
| "inaudible", "unclear", "", " ", "\n", "\t" | |
| ] | |
| cleaned = raw_text.strip().lower() | |
| if cleaned in invalid_phrases: | |
| return False | |
| # Check if it's mostly repeated characters (indicating poor quality) | |
| if len(set(cleaned)) < 3 and len(cleaned) > 10: | |
| return False | |
| return True | |
| # Singleton instance | |
| voice_processing_service = VoiceProcessingService() | |
| def get_voice_processing_service() -> VoiceProcessingService: | |
| """Get the singleton voice processing service instance""" | |
| return voice_processing_service |