Spaces:

iajitpanday
/

vBot-1.5

Sleeping

App Files Files Community

iajitpanday commited on May 9, 2025

Commit

fba9de8

verified ·

1 Parent(s): de01ccf

Create utils.py

Browse files

Files changed (1) hide show

utils.py +176 -0

utils.py ADDED Viewed

	@@ -0,0 +1,176 @@

+"""
+Utility functions for the AI call assistant system.
+"""
+import os
+import requests
+import json
+import random
+import tempfile
+import logging
+from pydub import AudioSegment
+import io
+import base64
+from transformers import pipeline
+logger = logging.getLogger(__name__)
+# Initialize HF API token (get this from your HF account)
+HF_API_TOKEN = os.environ.get("HF_API_TOKEN", "")
+# Initialize intent classifier
+try:
+    intent_classifier = pipeline(
+        "zero-shot-classification",
+        model="facebook/bart-large-mnli",
+    )
+except Exception as e:
+    logger.error(f"Error loading intent classifier: {e}")
+    intent_classifier = None
+# Possible intents
+POSSIBLE_INTENTS = [
+    "product_inquiry",
+    "technical_support",
+    "billing_question",
+    "general_information",
+    "appointment_scheduling",
+    "complaint",
+    "other"
+]
+# Fallback responses
+FALLBACK_RESPONSES = [
+    "I apologize, but I didn't quite understand that. Could you please repeat your question?",
+    "Thank you for your call. I'll make sure someone gets back to you with the information you need.",
+    "I'm having trouble processing your request. Let me transfer your information to our team who will get back to you shortly.",
+    "I've recorded your message and will have someone contact you as soon as possible.",
+    "Thank you for reaching out. I'll make sure your inquiry is addressed by the appropriate team member."
+]
+def transcribe_audio(audio_url):
+    """
+    Transcribe audio using OpenAI Whisper model from Hugging Face
+    """
+    try:
+        # Download audio from Twilio URL
+        response = requests.get(audio_url)
+        if response.status_code != 200:
+            logger.error(f"Failed to download audio from {audio_url}")
+            return None
+        audio_content = response.content
+        # Convert to format compatible with Whisper
+        audio = AudioSegment.from_file(io.BytesIO(audio_content))
+        audio = audio.set_channels(1).set_frame_rate(16000)
+        # Save temporarily
+        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio:
+            temp_filename = temp_audio.name
+            audio.export(temp_filename, format="wav")
+        # Use Hugging Face Whisper API
+        API_URL = "https://api-inference.huggingface.co/models/openai/whisper-large-v3"
+        headers = {"Authorization": f"Bearer {HF_API_TOKEN}"}
+        with open(temp_filename, "rb") as f:
+            audio_data = f.read()
+        response = requests.post(API_URL, headers=headers, data=audio_data)
+        os.unlink(temp_filename)  # Clean up temp file
+        if response.status_code == 200:
+            return response.json().get("text", "")
+        else:
+            logger.error(f"Error from Whisper API: {response.text}")
+            return None
+    except Exception as e:
+        logger.error(f"Error transcribing audio: {e}")
+        return None
+def classify_intent(text):
+    """Classify the intent of the user's message"""
+    if not text or not intent_classifier:
+        return "other", 0.0
+    try:
+        # Use zero-shot classification to determine intent
+        results = intent_classifier(
+            text,
+            candidate_labels=POSSIBLE_INTENTS,
+            hypothesis_template="This is a {} request."
+        )
+        # Get top intent and confidence
+        top_intent = results["labels"][0]
+        confidence = results["scores"][0]
+        return top_intent, confidence
+    except Exception as e:
+        logger.error(f"Error classifying intent: {e}")
+        return "other", 0.0
+def get_rag_response(query, intent, hf_space_url):
+    """Get response using the RAG system via Hugging Face Spaces"""
+    try:
+        # Prepare data for the Hugging Face Space
+        api_url = f"{hf_space_url}/api/predict"
+        payload = {
+            "data": [
+                query,
+                intent
+            ]
+        }
+        # Check if we should use API token
+        headers = {}
+        if HF_API_TOKEN:
+            headers["Authorization"] = f"Bearer {HF_API_TOKEN}"
+        # Call the Hugging Face Space
+        response = requests.post(api_url, json=payload, headers=headers)
+        if response.status_code == 200:
+            result = response.json()
+            # Extract the response text from the result
+            # Structure will depend on your Space's output format
+            response_text = result.get("data", ["I'm sorry, I couldn't process that request."])[0]
+            return response_text
+        else:
+            logger.error(f"Error from HF Space: {response.status_code} - {response.text}")
+            return get_fallback_response()
+    except Exception as e:
+        logger.error(f"Error getting RAG response: {e}")
+        return get_fallback_response()
+def text_to_speech(text):
+    """Convert text response to speech using Hugging Face TTS model"""
+    if not text:
+        return None
+    try:
+        API_URL = "https://api-inference.huggingface.co/models/espnet/kan-bayashi_ljspeech_vits"
+        headers = {"Authorization": f"Bearer {HF_API_TOKEN}"}
+        payload = {"inputs": text}
+        response = requests.post(API_URL, headers=headers, json=payload)
+        if response.status_code == 200:
+            # Return audio content in base64 for Twilio
+            audio_content = base64.b64encode(response.content).decode("utf-8")
+            return audio_content
+        else:
+            logger.error(f"Error from TTS API: {response.text}")
+            return None
+    except Exception as e:
+        logger.error(f"Error in text-to-speech: {e}")
+        return None
+def get_fallback_response():
+    """Return a fallback response"""
+    return random.choice(FALLBACK_RESPONSES)