Spaces:
Sleeping
Sleeping
| """ | |
| Utility functions for the AI call assistant system. | |
| """ | |
| import os | |
| import requests | |
| import json | |
| import random | |
| import tempfile | |
| import logging | |
| from pydub import AudioSegment | |
| import io | |
| import base64 | |
| from transformers import pipeline | |
| # Remove pipecat import as we'll use a simpler implementation | |
| logger = logging.getLogger(__name__) | |
| # Initialize HF API token (get this from your HF account) | |
| HF_API_TOKEN = os.environ.get("HF_API_TOKEN", "") | |
| # Initialize HF API token (get this from your HF account) | |
| HF_API_TOKEN = os.environ.get("HF_API_TOKEN", "") | |
| # Initialize intent classifier | |
| try: | |
| intent_classifier = pipeline( | |
| "zero-shot-classification", | |
| model="facebook/bart-large-mnli", | |
| ) | |
| except Exception as e: | |
| logger.error(f"Error loading intent classifier: {e}") | |
| intent_classifier = None | |
| # Possible intents | |
| POSSIBLE_INTENTS = [ | |
| "product_inquiry", | |
| "technical_support", | |
| "billing_question", | |
| "general_information", | |
| "appointment_scheduling", | |
| "complaint", | |
| "other" | |
| ] | |
| # Fallback responses | |
| FALLBACK_RESPONSES = [ | |
| "I apologize, but I didn't quite understand that. Could you please repeat your question?", | |
| "Thank you for your call. I'll make sure someone gets back to you with the information you need.", | |
| "I'm having trouble processing your request. Let me transfer your information to our team who will get back to you shortly.", | |
| "I've recorded your message and will have someone contact you as soon as possible.", | |
| "Thank you for reaching out. I'll make sure your inquiry is addressed by the appropriate team member." | |
| ] | |
| def transcribe_audio(audio_url): | |
| """ | |
| Transcribe audio using OpenAI Whisper model from Hugging Face | |
| """ | |
| try: | |
| # Download audio from Twilio URL | |
| response = requests.get(audio_url) | |
| if response.status_code != 200: | |
| logger.error(f"Failed to download audio from {audio_url}") | |
| return None | |
| audio_content = response.content | |
| # Convert to format compatible with Whisper | |
| audio = AudioSegment.from_file(io.BytesIO(audio_content)) | |
| audio = audio.set_channels(1).set_frame_rate(16000) | |
| # Save temporarily | |
| with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio: | |
| temp_filename = temp_audio.name | |
| audio.export(temp_filename, format="wav") | |
| # Use Hugging Face Whisper API | |
| API_URL = "https://api-inference.huggingface.co/models/openai/whisper-large-v3" | |
| headers = {"Authorization": f"Bearer {HF_API_TOKEN}"} | |
| with open(temp_filename, "rb") as f: | |
| audio_data = f.read() | |
| response = requests.post(API_URL, headers=headers, data=audio_data) | |
| os.unlink(temp_filename) # Clean up temp file | |
| if response.status_code == 200: | |
| return response.json().get("text", "") | |
| else: | |
| logger.error(f"Error from Whisper API: {response.text}") | |
| return None | |
| except Exception as e: | |
| logger.error(f"Error transcribing audio: {e}") | |
| return None | |
| def classify_intent(text): | |
| """Classify the intent of the user's message""" | |
| if not text or not intent_classifier: | |
| return "other", 0.0 | |
| try: | |
| # Use zero-shot classification to determine intent | |
| results = intent_classifier( | |
| text, | |
| candidate_labels=POSSIBLE_INTENTS, | |
| hypothesis_template="This is a {} request." | |
| ) | |
| # Get top intent and confidence | |
| top_intent = results["labels"][0] | |
| confidence = results["scores"][0] | |
| return top_intent, confidence | |
| except Exception as e: | |
| logger.error(f"Error classifying intent: {e}") | |
| return "other", 0.0 | |
| def get_rag_response(query, intent, hf_space_url): | |
| """Get response using the RAG system via Hugging Face Spaces""" | |
| try: | |
| # Prepare data for the Hugging Face Space | |
| api_url = f"{hf_space_url}/api/predict" | |
| payload = { | |
| "data": [ | |
| query, | |
| intent | |
| ] | |
| } | |
| # Check if we should use API token | |
| headers = {} | |
| if HF_API_TOKEN: | |
| headers["Authorization"] = f"Bearer {HF_API_TOKEN}" | |
| # Call the Hugging Face Space | |
| response = requests.post(api_url, json=payload, headers=headers) | |
| if response.status_code == 200: | |
| result = response.json() | |
| # Extract the response text from the result | |
| # Structure will depend on your Space's output format | |
| response_text = result.get("data", ["I'm sorry, I couldn't process that request."])[0] | |
| return response_text | |
| else: | |
| logger.error(f"Error from HF Space: {response.status_code} - {response.text}") | |
| return get_fallback_response() | |
| except Exception as e: | |
| logger.error(f"Error getting RAG response: {e}") | |
| return get_fallback_response() | |
| def text_to_speech(text): | |
| """Convert text response to speech using Hugging Face TTS model""" | |
| if not text: | |
| return None | |
| try: | |
| API_URL = "https://api-inference.huggingface.co/models/espnet/kan-bayashi_ljspeech_vits" | |
| headers = {"Authorization": f"Bearer {HF_API_TOKEN}"} | |
| payload = {"inputs": text} | |
| response = requests.post(API_URL, headers=headers, json=payload) | |
| if response.status_code == 200: | |
| # Return audio content in base64 for Twilio | |
| audio_content = base64.b64encode(response.content).decode("utf-8") | |
| return audio_content | |
| else: | |
| logger.error(f"Error from TTS API: {response.text}") | |
| return None | |
| except Exception as e: | |
| logger.error(f"Error in text-to-speech: {e}") | |
| return None | |
| def get_fallback_response(): | |
| """Return a fallback response""" | |
| return random.choice(FALLBACK_RESPONSES) | |
| def get_rag_response(query, intent, hf_space_url): | |
| """Get response using the RAG system via Hugging Face Spaces""" | |
| try: | |
| # Prepare data for the Hugging Face Space | |
| api_url = f"{hf_space_url}/api/predict" | |
| payload = { | |
| "data": [ | |
| query, | |
| intent | |
| ] | |
| } | |
| # Check if we should use API token | |
| headers = {} | |
| if HF_API_TOKEN: | |
| headers["Authorization"] = f"Bearer {HF_API_TOKEN}" | |
| # Call the Hugging Face Space | |
| response = requests.post(api_url, json=payload, headers=headers) | |
| if response.status_code == 200: | |
| result = response.json() | |
| # Extract the response text from the result | |
| # Structure will depend on your Space's output format | |
| response_text = result.get("data", ["I'm sorry, I couldn't process that request."])[0] | |
| return response_text | |
| else: | |
| logger.error(f"Error from HF Space: {response.status_code} - {response.text}") | |
| return get_fallback_response() | |
| except Exception as e: | |
| logger.error(f"Error getting RAG response: {e}") | |
| return get_fallback_response() |