Spaces:

trixareformen
/

test_faiss_lol

Sleeping

App Files Files Community

test_faiss_lol / scripts /services /services.py

trixareformen

Update scripts/services/services.py

7dcdfbb verified over 1 year ago

raw

history blame contribute delete

4.35 kB

	# Importing necessary libraries:
	import wave # Handles WAV audio files
	import io # Input/Output operations, helps with working with data in memory

	# Libraries for speech recognition and language processing:
	import speech_recognition as sr # Speech recognition capabilities
	from groq import Groq # Groq client for interacting with Groq AI platform
	import numpy as np # Powerful numerical computations (handling audio data)
	from scipy.io.wavfile import write # Writing audio files
	import librosa # Audio analysis (resampling audio)

	# Vectorstore and language model interactions:
	from langchain_community.vectorstores import FAISS # Efficient similarity search for documents
	from langchain_huggingface import HuggingFaceEmbeddings # Embeddings for language models
	import streamlit as st # Creating interactive web applications

	# Downsample audio from 96kHz to 44kHz (common for many applications)
	def downsample_audio(audio_data):
	# Convert audio data to suitable format for librosa
	audio_data = audio_data.astype(np.float32)

	# Resample the audio, changing sample rate from 96000 to 44000
	resampled_audio = librosa.resample(audio_data, orig_sr=96000, target_sr=44000)

	# Save the resampled audio as a WAV file named 'aud.wav'
	write('aud.wav', 44000, resampled_audio.astype(np.int16))

	# Save audio received as bytes (often from web interfaces) to a file
	def save_audio_from_bytes(audio_bytes):
	# Convert audio bytes into a NumPy array (easier to work with)
	audio_array = np.frombuffer(audio_bytes, dtype=np.int16)

	# Save the original (high-sample-rate) audio as 'audio.wav'
	write('audio.wav', 96000, audio_array)

	# Downsample the audio for further processing
	downsample_audio(audio_array)

	# Interact with Groq AI platform to get responses from a language model
	def run_groq(prompt, model):
	# Truncate the prompt if using 'llama2-70b-chat' model
	if model == 'llama3-70b-8192':
	prompt = prompt[:8191]

	# Create a Groq client (you'll need an API key)
	client = Groq(api_key='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')

	# Send the prompt to the specified language model on Groq
	chat_completion = client.chat.completions.create(
	messages=[
	{'role': 'user', 'content': prompt}
	],
	model=model
	)

	# Return the generated response from the model
	return chat_completion.choices[0].message.content

	# Transcribe audio from the 'aud.wav' file (assumes Spanish language)
	def transcript():
	# Create a speech recognizer object
	recognizer = sr.Recognizer()

	# Open the audio file 'aud.wav'
	with sr.AudioFile('aud.wav') as source:
	# Read the audio data from the file
	audio = recognizer.record(source)

	# Use Google's speech recognition to transcribe the audio (Spanish)
	text = recognizer.recognize_google(audio, language='en')

	# Return the transcribed text
	return text

	# Perform Retrieval Augmented Generation (RAG) to answer queries using context
	def run_rag(query, vc): # 'vc' is assumed to be a Vectorstore
	# Find similar documents in the Vectorstore based on the query
	similar_docs = vc.similarity_search(query, k=10) # Get top 10 similar docs

	# Combine the content of these similar documents into a single context
	context = '\n'.join([doc.page_content for doc in similar_docs])

	# Display the context in the Streamlit sidebar
	st.sidebar.write(context)

	# Construct a prompt for the language model, including the context
	prompt = f'''
	Your name is Emma and you are a virtual physiotherapist ,use the following context as your learned knowledge, inside <context></context> XML tags.
	<context>
	{context}
	</context>

	The context is taken from a set of pdfs

	When answer to user:
	- If you don't know, just say that you don't know.
	- If you don't know when you are not sure, ask for clarification.
	Avoid mentioning that you obtained the information from the context.
	And answer according to the language of the user's question.
	Make your answers detailed

	- Return yout answer in Spanish

	Given the context information, answer the query.
	Query: {query}
	'''

	# Get a response from the 'llama2-70b-chat' model on Groq
	response = run_groq(prompt, 'llama3-70b-8192')

	# Return the generated response
	return response