# Importing necessary libraries: import wave # Handles WAV audio files import io # Input/Output operations, helps with working with data in memory # Libraries for speech recognition and language processing: import speech_recognition as sr # Speech recognition capabilities from groq import Groq # Groq client for interacting with Groq AI platform import numpy as np # Powerful numerical computations (handling audio data) from scipy.io.wavfile import write # Writing audio files import librosa # Audio analysis (resampling audio) # Vectorstore and language model interactions: from langchain_community.vectorstores import FAISS # Efficient similarity search for documents from langchain_huggingface import HuggingFaceEmbeddings # Embeddings for language models import streamlit as st # Creating interactive web applications # Downsample audio from 96kHz to 44kHz (common for many applications) def downsample_audio(audio_data): # Convert audio data to suitable format for librosa audio_data = audio_data.astype(np.float32) # Resample the audio, changing sample rate from 96000 to 44000 resampled_audio = librosa.resample(audio_data, orig_sr=96000, target_sr=44000) # Save the resampled audio as a WAV file named 'aud.wav' write('aud.wav', 44000, resampled_audio.astype(np.int16)) # Save audio received as bytes (often from web interfaces) to a file def save_audio_from_bytes(audio_bytes): # Convert audio bytes into a NumPy array (easier to work with) audio_array = np.frombuffer(audio_bytes, dtype=np.int16) # Save the original (high-sample-rate) audio as 'audio.wav' write('audio.wav', 96000, audio_array) # Downsample the audio for further processing downsample_audio(audio_array) # Interact with Groq AI platform to get responses from a language model def run_groq(prompt, model): # Truncate the prompt if using 'llama2-70b-chat' model if model == 'llama3-70b-8192': prompt = prompt[:8191] # Create a Groq client (you'll need an API key) client = Groq(api_key='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') # Send the prompt to the specified language model on Groq chat_completion = client.chat.completions.create( messages=[ {'role': 'user', 'content': prompt} ], model=model ) # Return the generated response from the model return chat_completion.choices[0].message.content # Transcribe audio from the 'aud.wav' file (assumes Spanish language) def transcript(): # Create a speech recognizer object recognizer = sr.Recognizer() # Open the audio file 'aud.wav' with sr.AudioFile('aud.wav') as source: # Read the audio data from the file audio = recognizer.record(source) # Use Google's speech recognition to transcribe the audio (Spanish) text = recognizer.recognize_google(audio, language='en') # Return the transcribed text return text # Perform Retrieval Augmented Generation (RAG) to answer queries using context def run_rag(query, vc): # 'vc' is assumed to be a Vectorstore # Find similar documents in the Vectorstore based on the query similar_docs = vc.similarity_search(query, k=10) # Get top 10 similar docs # Combine the content of these similar documents into a single context context = '\n'.join([doc.page_content for doc in similar_docs]) # Display the context in the Streamlit sidebar st.sidebar.write(context) # Construct a prompt for the language model, including the context prompt = f''' Your name is Emma and you are a virtual physiotherapist ,use the following context as your learned knowledge, inside XML tags. {context} The context is taken from a set of pdfs When answer to user: - If you don't know, just say that you don't know. - If you don't know when you are not sure, ask for clarification. Avoid mentioning that you obtained the information from the context. And answer according to the language of the user's question. Make your answers detailed - Return yout answer in Spanish Given the context information, answer the query. Query: {query} ''' # Get a response from the 'llama2-70b-chat' model on Groq response = run_groq(prompt, 'llama3-70b-8192') # Return the generated response return response