RKP64 commited on
Commit
86a8fc9
·
1 Parent(s): d9a2795

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +133 -0
app.py ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import openai
3
+ import streamlit as st
4
+ from audio_recorder_streamlit import audio_recorder
5
+ from elevenlabs import generate
6
+ from langchain.chains import RetrievalQA
7
+ from langchain.chat_models import ChatOpenAI
8
+ from langchain.embeddings.openai import OpenAIEmbeddings
9
+ from langchain.vectorstores import DeepLake
10
+ from streamlit_chat import message
11
+ from dotenv import load_dotenv
12
+
13
+ # Load environment variables from the .env file
14
+ load_dotenv()
15
+
16
+ # Constants
17
+ TEMP_AUDIO_PATH = "temp_audio.wav"
18
+ AUDIO_FORMAT = "audio/wav"
19
+
20
+ # Load environment variables from .env file and return the keys
21
+ openai.api_key = os.environ.get('OPENAI_API_KEY')
22
+ eleven_api_key = os.environ.get('ELEVEN_API_KEY')
23
+ active_loop_data_set_path = os.environ.get('DEEPLAKE_DATASET_PATH')
24
+
25
+ # Load embeddings and DeepLake database
26
+ def load_embeddings_and_database(active_loop_data_set_path):
27
+ embeddings = OpenAIEmbeddings()
28
+ db = DeepLake(
29
+ dataset_path=active_loop_data_set_path,
30
+ read_only=True,
31
+ embedding_function=embeddings
32
+ )
33
+ return db
34
+
35
+ # Transcribe audio using OpenAI Whisper API
36
+ def transcribe_audio(audio_file_path, openai_key):
37
+ openai.api_key = openai_key
38
+ try:
39
+ with open(audio_file_path, "rb") as audio_file:
40
+ response = openai.Audio.transcribe("whisper-1", audio_file)
41
+ return response["text"]
42
+ except Exception as e:
43
+ print(f"Error calling Whisper API: {str(e)}")
44
+ return None
45
+
46
+ # Record audio using audio_recorder and transcribe using transcribe_audio
47
+ def record_and_transcribe_audio():
48
+ audio_bytes = audio_recorder()
49
+ transcription = None
50
+ if audio_bytes:
51
+ st.audio(audio_bytes, format=AUDIO_FORMAT)
52
+
53
+ with open(TEMP_AUDIO_PATH, "wb") as f:
54
+ f.write(audio_bytes)
55
+
56
+ if st.button("Transcribe"):
57
+ transcription = transcribe_audio(TEMP_AUDIO_PATH, openai.api_key)
58
+ os.remove(TEMP_AUDIO_PATH)
59
+ display_transcription(transcription)
60
+
61
+ return transcription
62
+
63
+ # Display the transcription of the audio on the app
64
+ def display_transcription(transcription):
65
+ if transcription:
66
+ st.write(f"Transcription: {transcription}")
67
+ with open("audio_transcription.txt", "w+") as f:
68
+ f.write(transcription)
69
+ else:
70
+ st.write("Error transcribing audio.")
71
+
72
+ # Get user input from Streamlit text input field
73
+ def get_user_input(transcription):
74
+ return st.text_input("", value=transcription if transcription else "", key="input")
75
+
76
+ # Search the database for a response based on the user's query
77
+ def search_db(user_input, db):
78
+ print(user_input)
79
+ retriever = db.as_retriever()
80
+ retriever.search_kwargs['distance_metric'] = 'cos'
81
+ retriever.search_kwargs['fetch_k'] = 100
82
+ retriever.search_kwargs['maximal_marginal_relevance'] = True
83
+ retriever.search_kwargs['k'] = 10
84
+ model = ChatOpenAI(model='gpt-3.5-turbo')
85
+ qa = RetrievalQA.from_llm(model, retriever=retriever, return_source_documents=True)
86
+ return qa({'query': user_input})
87
+
88
+ # Display conversation history using Streamlit messages
89
+ def display_conversation(history):
90
+ for i in range(len(history["generated"])):
91
+ message(history["past"][i], is_user=True, key=str(i) + "_user")
92
+ message(history["generated"][i],key=str(i))
93
+ #Voice using Eleven API
94
+ voice= "Anish de"
95
+ text= history["generated"][i]
96
+ audio = generate(text=text, voice=voice,api_key=eleven_api_key)
97
+ st.audio(audio, format='audio/mp3')
98
+
99
+ # Main function to run the app
100
+ def main():
101
+ # Initialize Streamlit app with a title
102
+ st.write("# KPMG VOICE GPT")
103
+
104
+ # Load embeddings and the DeepLake database
105
+ db = load_embeddings_and_database(active_loop_data_set_path)
106
+
107
+ # Record and transcribe audio
108
+ transcription = record_and_transcribe_audio()
109
+
110
+ # Get user input from text input or audio transcription
111
+ user_input = get_user_input(transcription)
112
+
113
+ # Initialize session state for generated responses and past messages
114
+ if "generated" not in st.session_state:
115
+ st.session_state["generated"] = ["I am ready to help you"]
116
+ if "past" not in st.session_state:
117
+ st.session_state["past"] = ["Hey there!"]
118
+
119
+ # Search the database for a response based on user input and update session state
120
+ if user_input:
121
+ output = search_db(user_input, db)
122
+ print(output['source_documents'])
123
+ st.session_state.past.append(user_input)
124
+ response = str(output["result"])
125
+ st.session_state.generated.append(response)
126
+
127
+ # Display conversation history using Streamlit messages
128
+ if st.session_state["generated"]:
129
+ display_conversation(st.session_state)
130
+
131
+ # Run the main function when the script is executed
132
+ if __name__ == "__main__":
133
+ main()