Spaces:

iajitpanday
/

vBot-1.7

Runtime error

App Files Files Community

iajitpanday commited on May 10, 2025

Commit

e6d8c7b

verified ·

1 Parent(s): 96245ca

Update app.py

Browse files

Files changed (1) hide show

app.py +233 -48

app.py CHANGED Viewed

@@ -1,60 +1,245 @@
 # app.py
 import gradio as gr
-from twilio.rest import Client
-from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
-import whisper
-import pyttsx3
-import io
-import wave
-# Initialize models
-tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-medium")
-model = AutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-medium")
-whisper_model = whisper.load_model("base")
-# Initialize Twilio client
-account_sid = os.environ["TWILIO_ACCOUNT_SID"]
-auth_token = os.environ["TWILIO_AUTH_TOKEN"]
-client = Client(account_sid, auth_token)
-def process_voice_call(audio_data):
-    # 1. Speech to Text using Whisper
-    audio = whisper.load_audio(audio_data)
-    result = whisper_model.transcribe(audio)
-    user_text = result["text"]
-    # 2. Generate response using LLM
-    new_user_input_ids = tokenizer.encode(user_text + tokenizer.eos_token,
-                                          return_tensors='pt')
-    chat_history_ids = model.generate(
-        new_user_input_ids,
-        max_length=1000,
-        num_beams=5,
-        no_repeat_ngram_size=2,
-        temperature=0.7,
-        do_sample=True,
-        top_k=50,
-        top_p=0.95,
-        pad_token_id=tokenizer.eos_token_id
-    )
-    response = tokenizer.decode(chat_history_ids[:, new_user_input_ids.shape[-1]:][0],
-                                skip_special_tokens=True)
-    # 3. Text to Speech
-    tts_engine = pyttsx3.init()
-    tts_engine.save_to_file(response, "response.wav")
-    tts_engine.runAndWait()
-    return response, "response.wav"
 # Create Gradio interface
-iface = gr.Interface(
-    fn=process_voice_call,
-    inputs=gr.Audio(type="filepath"),
-    outputs=[gr.Textbox(), gr.Audio()],
-    title="Voice AI Customer Support"
-)
 if __name__ == "__main__":
-    iface.launch()

 # app.py
 import gradio as gr
+import torch
+import numpy as np
+from transformers import (
+    AutoTokenizer,
+    AutoModelForCausalLM,
+    AutoProcessor,
+    AutoModelForSpeechSeq2Seq,
+    pipeline
+)
+from TTS.api import TTS
+import tempfile
+import os
+import json
+import logging
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+class VoiceAIBot:
+    def __init__(self):
+        # Initialize models
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        logger.info(f"Using device: {self.device}")
+        # Speech Recognition Model (Whisper)
+        self.asr_model = pipeline(
+            "automatic-speech-recognition",
+            model="openai/whisper-base",
+            device=self.device
+        )
+        # Conversation Model (DialoGPT for customer support)
+        self.tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-medium")
+        self.conversation_model = AutoModelForCausalLM.from_pretrained(
+            "microsoft/DialoGPT-medium"
+        ).to(self.device)
+        # Text-to-Speech Model
+        self.tts = TTS("tts_models/en/ljspeech/tacotron2-DDC")
+        # Customer support knowledge base
+        self.knowledge_base = {
+            "order status": "I can help you check your order status. Please provide your order number.",
+            "return policy": "Our return policy allows returns within 30 days of purchase. Items must be unused and in original packaging.",
+            "shipping": "Standard shipping takes 3-5 business days. Express shipping takes 1-2 business days.",
+            "payment": "We accept all major credit cards, PayPal, and Apple Pay.",
+            "business hours": "We're open Monday-Friday, 9 AM to 6 PM EST.",
+            "technical support": "I can help with basic technical issues. For complex problems, I'll connect you with our technical team.",
+        }
+        # Conversation history
+        self.conversation_history = []
+    def transcribe_audio(self, audio_file):
+        """Convert speech to text using Whisper"""
+        try:
+            result = self.asr_model(audio_file)
+            transcription = result["text"]
+            logger.info(f"Transcription: {transcription}")
+            return transcription
+        except Exception as e:
+            logger.error(f"Transcription error: {e}")
+            return "Sorry, I couldn't understand the audio."
+    def check_knowledge_base(self, user_input):
+        """Check if query matches knowledge base"""
+        user_input_lower = user_input.lower()
+        for keyword, response in self.knowledge_base.items():
+            if keyword in user_input_lower:
+                return response
+        return None
+    def generate_response(self, user_input):
+        """Generate AI response based on user input"""
+        # First check knowledge base
+        kb_response = self.check_knowledge_base(user_input)
+        if kb_response:
+            return kb_response
+        # If not found in knowledge base, use conversation model
+        try:
+            # Add current conversation to history
+            self.conversation_history.append(user_input)
+            # Prepare input for the model
+            input_text = "Customer: " + user_input + " Agent:"
+            input_ids = self.tokenizer.encode(input_text, return_tensors="pt").to(self.device)
+            # Generate response
+            with torch.no_grad():
+                output = self.conversation_model.generate(
+                    input_ids,
+                    max_length=150,
+                    num_beams=5,
+                    temperature=0.7,
+                    do_sample=True,
+                    top_k=50,
+                    top_p=0.95,
+                    pad_token_id=self.tokenizer.eos_token_id,
+                    no_repeat_ngram_size=2
+                )
+            response = self.tokenizer.decode(output[0], skip_special_tokens=True)
+            # Extract only the agent's response
+            agent_response = response.split("Agent:")[-1].strip()
+            # Add to conversation history
+            self.conversation_history.append(agent_response)
+            # Keep conversation history manageable
+            if len(self.conversation_history) > 10:
+                self.conversation_history = self.conversation_history[-10:]
+            logger.info(f"Generated response: {agent_response}")
+            return agent_response
+        except Exception as e:
+            logger.error(f"Response generation error: {e}")
+            return "I'm sorry, I'm having trouble processing your request right now. Can you please try again?"
+    def text_to_speech(self, text):
+        """Convert text to speech"""
+        try:
+            # Create temporary file for audio output
+            with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
+                self.tts.tts_to_file(text=text, file_path=tmp_file.name)
+                return tmp_file.name
+        except Exception as e:
+            logger.error(f"TTS error: {e}")
+            return None
+    def process_voice_input(self, audio_file):
+        """Process complete voice interaction"""
+        if audio_file is None:
+            return "Please provide an audio input.", None, self.format_conversation_history()
+        # 1. Transcribe speech to text
+        user_text = self.transcribe_audio(audio_file)
+        # 2. Generate AI response
+        ai_response = self.generate_response(user_text)
+        # 3. Convert response to speech
+        audio_response = self.text_to_speech(ai_response)
+        # 4. Return all outputs
+        return user_text, audio_response, self.format_conversation_history()
+    def format_conversation_history(self):
+        """Format conversation history for display"""
+        if not self.conversation_history:
+            return "No conversation history yet."
+        formatted = "Conversation History:\n\n"
+        for i in range(0, len(self.conversation_history), 2):
+            if i < len(self.conversation_history):
+                formatted += f"Customer: {self.conversation_history[i]}\n"
+            if i + 1 < len(self.conversation_history):
+                formatted += f"Agent: {self.conversation_history[i + 1]}\n\n"
+        return formatted
+    def clear_history(self):
+        """Clear conversation history"""
+        self.conversation_history = []
+        return "Conversation history cleared.", self.format_conversation_history()
+# Initialize the bot
+bot = VoiceAIBot()
 # Create Gradio interface
+def create_interface():
+    with gr.Blocks(title="Voice AI Customer Support Bot") as demo:
+        gr.Markdown("# 🎤 Voice AI Customer Support Bot")
+        gr.Markdown("Upload audio or record your voice to interact with the AI customer support agent.")
+        with gr.Row():
+            with gr.Column(scale=1):
+                # Audio input
+                audio_input = gr.Audio(
+                    sources=["microphone", "upload"],
+                    type="filepath",
+                    label="Speak your question"
+                )
+                # Process button
+                process_btn = gr.Button("Process Voice", variant="primary")
+                # Clear history button
+                clear_btn = gr.Button("Clear History", variant="secondary")
+            with gr.Column(scale=1):
+                # Transcribed text output
+                transcription_output = gr.Textbox(
+                    label="What you said:",
+                    interactive=False
+                )
+                # Audio response output
+                audio_output = gr.Audio(
+                    label="AI Response (Audio)",
+                    interactive=False
+                )
+        # Conversation history
+        with gr.Row():
+            conversation_history = gr.Textbox(
+                label="Conversation History",
+                lines=10,
+                interactive=False
+            )
+        # Event handlers
+        process_btn.click(
+            fn=bot.process_voice_input,
+            inputs=[audio_input],
+            outputs=[transcription_output, audio_output, conversation_history]
+        )
+        clear_btn.click(
+            fn=bot.clear_history,
+            inputs=[],
+            outputs=[transcription_output, conversation_history]
+        )
+        # Example usage
+        gr.Markdown("## Example Queries")
+        gr.Markdown("""
+        Try asking about:
+        - Order status
+        - Return policy
+        - Shipping information
+        - Business hours
+        - Technical support
+        """)
+    return demo
+# Launch the interface
 if __name__ == "__main__":
+    demo = create_interface()
+    demo.launch(share=True)