cloud-sean commited on
Commit
c73560f
·
1 Parent(s): 2786061

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +277 -0
app.py ADDED
@@ -0,0 +1,277 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os, uuid, json
3
+ import requests
4
+ import os
5
+ import openai
6
+ import time
7
+ from tempfile import NamedTemporaryFile
8
+ from st_audiorec import st_audiorec
9
+ from azure.identity import DefaultAzureCredential
10
+ from azure.storage.blob import BlobServiceClient, BlobClient, ContainerClient
11
+ from datetime import datetime
12
+ from pydub import AudioSegment
13
+
14
+ AOAI_ENDPOINT = "https://whisper-aoai-sean.openai.azure.com"
15
+ WHISPER_DEPLOYMENT_NAME = "whisper"
16
+
17
+
18
+
19
+ AOAI_KEY = os.environ.get("AOAI_KEY")
20
+ WHISPER_PROMPT = "The following is a conversation between a doctor and a patient."
21
+ AOAI_PROMPT_DOCTOR = "I am a doctor. create a summary of this patient encounter for me. respond in the same language as the text was given in."
22
+ AOAI_PROMPT_STANDARD = "Summerize this text. Call out key points. Return in markdown format."
23
+ AZURE_BLOB_CONNECTION_STRING = os.environ.get("AZURE_BLOB_CONNECTION_STRING")
24
+ TRANSCRIPTION_API_KEY = os.environ.get("TRANSCRIPTION_API_KEY")
25
+ wav_audio_data = None
26
+
27
+
28
+ openai.api_type = "azure"
29
+ openai.api_base = aoai_endpoint = "https://eastus-openai-sean.openai.azure.com/"
30
+ openai.api_key = aoai_key = "26d0aaa9d01340cca61da08b29c44069"
31
+ openai.api_version = "2023-07-01-preview"
32
+
33
+ if "hebrew_mode" not in st.session_state:
34
+ st.session_state["hebrew_mode"] = ''
35
+
36
+
37
+ if "summary" not in st.session_state:
38
+ st.session_state["summary"] = ''
39
+
40
+ if "request_status" not in st.session_state:
41
+ st.session_state["request_status"] = "Pending"
42
+
43
+ if "transcription" not in st.session_state:
44
+ st.session_state["transcription"] = ''
45
+
46
+ if "recording" not in st.session_state:
47
+ st.session_state["recording"] = 'na'
48
+
49
+ if "clicked" not in st.session_state:
50
+ st.session_state["clicked"] = False
51
+
52
+ if "raw_transcription" not in st.session_state:
53
+ st.session_state["raw_transcription"] = ''
54
+
55
+ def click_button():
56
+ st.session_state["clicked"] = True
57
+
58
+ def create_transcription_request(blob_url):
59
+ url = "https://eastus.api.cognitive.microsoft.com/speechtotext/v3.2-preview.1/transcriptions"
60
+ locale = "en-us"
61
+ if st.session_state.hebrew_mode:
62
+ locale = "he-il"
63
+ payload = json.dumps({
64
+ "displayName": "20231106_182337",
65
+ "description": "Speech Studio Batch speech to text",
66
+ "locale": locale,
67
+ "contentUrls": [
68
+ blob_url
69
+ ],
70
+ "model": {
71
+ "self": "https://eastus.api.cognitive.microsoft.com/speechtotext/v3.2-preview.1/models/base/e830341e-8f47-4e0a-b64c-3f66167b751c"
72
+ },
73
+ "properties": {
74
+ "wordLevelTimestampsEnabled": False,
75
+ "displayFormWordLevelTimestampsEnabled": False,
76
+ "diarizationEnabled": True,
77
+ "diarization": {
78
+ "speakers": {
79
+ "minCount": 1,
80
+ "maxCount": 2
81
+ }
82
+ },
83
+ "punctuationMode": "DictatedAndAutomatic",
84
+ "profanityFilterMode": "Masked"
85
+ },
86
+ "customProperties": {}
87
+ })
88
+ headers = {
89
+ 'Ocp-Apim-Subscription-Key': TRANSCRIPTION_API_KEY,
90
+ 'Content-Type': 'application/json'
91
+ }
92
+
93
+ response = requests.request("POST", url, headers=headers, data=payload)
94
+ if response.status_code != 201:
95
+ return st.error("Error creating transcription request")
96
+ else:
97
+ return response.json()["self"]
98
+
99
+ def attempt_to_get_transcription(transcription_url):
100
+ headers = {
101
+ 'Ocp-Apim-Subscription-Key': TRANSCRIPTION_API_KEY,
102
+ 'Content-Type': 'application/json'
103
+ }
104
+ output = requests.get(transcription_url, headers=headers).json()
105
+ return output["status"]
106
+
107
+ def extract_conversation(json_data):
108
+ # Parse the JSON data
109
+ data = json.loads(json_data)
110
+
111
+ # Extract the recognized phrases
112
+ recognized_phrases = data.get("recognizedPhrases", [])
113
+
114
+ # Sort the phrases by offsetInTicks (if they're not already sorted)
115
+ recognized_phrases.sort(key=lambda x: x.get("offsetInTicks", 0))
116
+
117
+ # Build the conversation string
118
+ conversation = []
119
+ for phrase in recognized_phrases:
120
+ speaker = f"Person {phrase.get('speaker')}"
121
+ # Assuming we want to take the first 'nBest' element as it's the most confident one
122
+ text = phrase['nBest'][0].get('display', '')
123
+ conversation.append(f"{speaker}: {text} \n")
124
+
125
+ # Join the conversation lines with a newline character
126
+ return '\n'.join(conversation)
127
+
128
+ def get_final_transcription(transcription_url):
129
+ headers = {
130
+ 'Ocp-Apim-Subscription-Key': TRANSCRIPTION_API_KEY,
131
+ 'Content-Type': 'application/json'
132
+ }
133
+ transcription_url = f"{transcription_url}/files"
134
+ output = requests.get(transcription_url, headers=headers).json()["values"]
135
+ for item in output:
136
+ if item["kind"] == "Transcription":
137
+ output = item["links"]["contentUrl"]
138
+ break
139
+ request = requests.get(output, headers=headers)
140
+
141
+ return extract_conversation(request.text)
142
+
143
+ def upload_audio(audio_bytes):
144
+ # save audio to temp file
145
+ now = datetime.now()
146
+ filename = now.strftime("%Y%m%d_%H%M%S") + ".wav"
147
+ # save it as a temporary file
148
+
149
+ with NamedTemporaryFile(delete=False) as f:
150
+ if type(audio_bytes) == bytes:
151
+ f.write(audio_bytes)
152
+ else:
153
+ f.write(audio_bytes.getbuffer())
154
+ temp_filename = f.name
155
+
156
+
157
+ sound = AudioSegment.from_wav(temp_filename)
158
+ sound = sound.set_channels(1)
159
+ sound.export(f"{temp_filename}.wav", format="wav")
160
+
161
+ blob_service_client = BlobServiceClient.from_connection_string(AZURE_BLOB_CONNECTION_STRING)
162
+ blob_client = blob_service_client.get_blob_client(container="audiofiles", blob=filename)
163
+
164
+ try:
165
+ with open(f"{temp_filename}.wav", "rb") as data:
166
+ blob_client.upload_blob(data)
167
+ return blob_client.url
168
+ except:
169
+ return st.error("Error uploading to Azure Blob Storage")
170
+
171
+ def summerize_with_gpt(text, additional="Standard"):
172
+
173
+ response = openai.ChatCompletion.create(
174
+ engine="gpt-4-32k",
175
+ messages = [{"role":"system","content": f"{AOAI_PROMPT_DOCTOR} \n {additional}"}, {"role":"user","content":text}],
176
+ temperature=0.2,
177
+ max_tokens=1200,
178
+ top_p=0.95,
179
+ frequency_penalty=0,
180
+ presence_penalty=0,
181
+ stop=None)
182
+ return response.choices[0].message.content
183
+
184
+ def transcribe(audio_bytes):
185
+ url = f"{AOAI_ENDPOINT}/openai/deployments/{WHISPER_DEPLOYMENT_NAME}/audio/transcriptions?prompt={WHISPER_PROMPT}&api-key={AOAI_KEY}&api-version=2023-09-01-preview"
186
+
187
+ files = [
188
+ ('file', ('Recording.wav', audio_bytes, 'application/octet-stream'))
189
+ ]
190
+
191
+
192
+ response = requests.post(url, files=files)
193
+ return response.json()
194
+
195
+ st.title("Summerizer 🧬")
196
+
197
+ st.session_state.hebrew_mode = st.toggle("Hebew", False)
198
+
199
+ # st.session_state.hebrew_mode = st.toggle("Hebrew Mode", False)
200
+ select_container = st.empty()
201
+ text_box = st.empty()
202
+ request_completed = False
203
+ tmp = ""
204
+ html_right = "<div style='text-align: right;>"
205
+
206
+
207
+
208
+
209
+ with select_container.container():
210
+ select = st.selectbox("Upload or Record", ("Upload", "Record", "Text"))
211
+ if select == "Record":
212
+ wav_audio_data = st_audiorec()
213
+ elif select == "Upload":
214
+ wav_audio_data = st.file_uploader("Upload Audio", type=["wav"])
215
+ elif select == "Text":
216
+ text_data = st.text_area("Enter Text")
217
+ summary_types = st.text_input("Enter Summary Type etc. (Standard, Bullet, or Paragraph)")
218
+ done_speech_button = st.button("Upload", on_click=click_button)
219
+
220
+ if st.session_state.clicked:
221
+ if wav_audio_data is not None:
222
+ st.session_state.clicked = False
223
+ with st.spinner("Uploading to Azure Blob storage..."):
224
+ blob_url = upload_audio(wav_audio_data)
225
+ st.toast("Successfully Uploaded!",icon="✅")
226
+ with st.status("Using Azure Speech with OpenAI's Whisper to transcribe..."):
227
+ transcription_request = create_transcription_request(blob_url)
228
+ time.sleep(1)
229
+ st.write("Transcription Request Created!")
230
+ st.toast("Successfully Created Transcription Request!",icon="✅")
231
+
232
+ while request_completed == False:
233
+ request_status = attempt_to_get_transcription(transcription_request)
234
+ if tmp != request_status:
235
+ st.write(f"Transcription Status: {request_status}")
236
+ time.sleep(1)
237
+ tmp = request_status
238
+
239
+ if request_status == "Succeeded":
240
+ st.write("Transcription Complete!")
241
+ st.toast("Successfully Transcribed!",icon="✅")
242
+ request_completed = True
243
+ st.write("Grabbing Transcription...")
244
+ time.sleep(1)
245
+ raw_transcription = get_final_transcription(transcription_url=transcription_request)
246
+ st.write("Successfully Grabbed Transcription!")
247
+ with st.expander("Transcription", False):
248
+ if st.session_state.hebrew_mode:
249
+ st.markdown(f"<div style='text-align: right;'> {raw_transcription} </div>",unsafe_allow_html=True)
250
+ else:
251
+ st.session_state.raw_transcript = st.markdown(f"{raw_transcription}")
252
+ with st.status("Using GPT-4 to summerize..."):
253
+ st.write("Starting up the GPUs!")
254
+ st.session_state.summary = summerize_with_gpt(raw_transcription)
255
+ st.write("Successfully Summerized!")
256
+ st.toast("Successfully Summerized!",icon="✅")
257
+ with st.expander("Summary", False):
258
+ if st.session_state.hebrew_mode:
259
+ st.markdown(f"<div style='text-align: right;'> {st.session_state.summary} </div>",unsafe_allow_html=True)
260
+ else:
261
+ st.markdown(f"{st.session_state.summary}",unsafe_allow_html=True)
262
+ elif text_data is not None:
263
+ st.session_state.clicked = False
264
+ with st.status("Using GPT-4 to summerize..."):
265
+ st.write("Starting up the GPUs!")
266
+ st.session_state.summary = summerize_with_gpt(text_data, summary_types)
267
+ st.write("Successfully Summerized!")
268
+ st.toast("Successfully Summerized!",icon="✅")
269
+ with st.expander("Summary", False):
270
+ if st.session_state.hebrew_mode:
271
+ st.markdown(f"<div style='text-align: right;'> {st.session_state.summary} </div>",unsafe_allow_html=True)
272
+ else:
273
+ st.markdown(f"{st.session_state.summary}",unsafe_allow_html=True)
274
+ else:
275
+ st.error("Please upload or record audio")
276
+ st.session_state.clicked = False
277
+