File size: 5,459 Bytes
383fc57 eea11c3 c664abc e8df9a7 5fac707 e8df9a7 2048eff 1e6668b 4d00fc6 69a15a9 2048eff 383fc57 69a15a9 e8df9a7 c2a2252 e8df9a7 de72d8a 5063082 4a955e8 7ffd1df 5063082 de72d8a c85decc e8df9a7 e4fddaa d23413d 9568566 d23413d 7196f1f 9568566 3c7baec e4fddaa d23413d c85decc c64345b e8df9a7 c64345b e8df9a7 094fe7f 2048eff 094fe7f eea11c3 094fe7f 2048eff e8df9a7 2048eff e8df9a7 4d00fc6 bb56de7 a4f19ec 2048eff 3c7baec 6e183d3 e8df9a7 9568566 38eff04 f13906f 77b171b 5063082 e8df9a7 aae8a6c e8df9a7 4d00fc6 3eb966b b6c1c87 97fca9b d4994f6 e8df9a7 36a9eec | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 | import os
import openai
import time
import numpy as np
from numpy import True_
import gradio as gr
from gradio_rich_textbox import RichTextbox
import soundfile as sf
from pydub import AudioSegment
from openai import OpenAI
########## For creating a debug report
# import subprocess
# myGradioEnvironment = subprocess.run(['gradio','environment'], stdout=subprocess.PIPE)
# print(myGradioEnvironment.stdout.decode('utf-8'))
# Load API key from an environment variable
OPENAI_SECRET_KEY = os.environ.get("OPENAI_SECRET_KEY")
client = OpenAI(api_key = OPENAI_SECRET_KEY)
note_transcript = ""
def transcribe(audio, history_type):
global note_transcript
print(f"Received audio file path: {audio}")
history_type_map = {
"History Simple": "McIntyre_Short_History.txt",
"History Complex": "McIntyre_Long_History.txt",
"Full Visit": "McIntyre_Full_Visit.txt",
"Impression/Plan": "McIntyre_Impression.txt",
"EMS": "McIntyre_EMS_Handover.txt",
"Dx/DDx": "McIntyre_Dx_DDx_Format.txt",
"Feedback": "McIntyre_Feedback.txt",
"Hallway Consult": "McIntyre_Hallway_Consult_Format.txt"
}
file_name = history_type_map.get(history_type, "McIntyre_Full_Visit.txt")
with open(f"Format_Library/{file_name}", "r") as f:
role = f.read()
messages = [{"role": "system", "content": role}]
######################## Take Audio from Numpy Array
#samplerate, audio_data = audio
#if isinstance(audio_data[0], np.ndarray) and len(audio_data[0]) == 2:
# Convert stereo audio data to mono by averaging the two channels
# audio_data = np.mean(audio_data, axis=1).astype(np.int16)
# If the audio data is already mono, no conversion is needed
######################## Read audio file, if using file
max_attempts = 1
attempt = 0
audio_data = None
samplerate = None
while attempt < max_attempts:
try:
if audio is None:
raise TypeError("Invalid file: None")
audio_data, samplerate = sf.read(audio)
break
except (OSError, TypeError) as e:
print(f"Attempt {attempt + 1} of {max_attempts} failed with error: {e}")
attempt += 1
time.sleep(3)
else:
print(f"###############Failed to open audio file after {max_attempts} attempts.##############")
return # Terminate the function or raise an exception if the file could not be opened
###################Code to convert .wav to .mp3 (if neccesary)
sf.write("Audio_Files/test.wav", audio_data, samplerate, subtype='PCM_16')
sound = AudioSegment.from_wav("Audio_Files/test.wav")
sound.export("Audio_Files/test.mp3", format="mp3")
sf.write("Audio_Files/test.mp3", audio_data, samplerate)
################ Send file to Whisper for Transcription
audio_file = open("Audio_Files/test.mp3", "rb")
max_attempts = 3
attempt = 0
while attempt < max_attempts:
try:
audio_transcript = client.audio.transcriptions.create(model="whisper-1", file=audio_file)
break
except openai.error.APIConnectionError as e:
print(f"Attempt {attempt + 1} failed with error: {e}")
attempt += 1
time.sleep(3) # wait for 3 seconds before retrying
else:
print("Failed to transcribe audio after multiple attempts")
print(audio_transcript.text)
messages.append({"role": "user", "content": audio_transcript.text})
#Create Sample Dialogue Transcript from File (for debugging)
#with open('Audio_Files/Test_Elbow.txt', 'r') as file:
# audio_transcript = file.read()
#messages.append({"role": "user", "content": audio_transcript})
### Word and MB Count
file_size = os.path.getsize("Audio_Files/test.mp3")
mp3_megabytes = file_size / (1024 * 1024)
mp3_megabytes = round(mp3_megabytes, 2)
audio_transcript_words = audio_transcript.text.split() # Use when using mic input
#audio_transcript_words = audio_transcript.split() #Use when using file
num_words = len(audio_transcript_words)
#Ask OpenAI to create note transcript
response = client.chat.completions.create(model="gpt-4o", temperature=0, messages=messages)
#response = client.chat.completions.create(model="gpt-4-turbo-2024-04-09", temperature=0, messages=messages)
#response = client.chat.completions.create(model="gpt-3.5-turbo", temperature=0, messages=messages)
note_transcript = response.choices[0].message.content
print(note_transcript)
return [note_transcript, num_words, mp3_megabytes]
#Define Gradio Interface
my_inputs = [
#gr.Audio(source="microphone", type="filepath"), #Gradio 3.48.0
#gr.Audio(sources=["microphone"], type="filepath",format="wav"), #Gradio 4.x
#gr.Audio(sources=["microphone"],type="numpy",editable="false"), #Gradio 4.x
gr.Microphone(type="filepath",format="wav"), #Gradio 4.x
gr.Radio(["History Simple","History Complex","Full Visit","Impression/Plan","EMS","Dx/DDx","Feedback","Hallway Consult"], show_label=False),
]
ui = gr.Interface(fn=transcribe,
inputs=my_inputs,
outputs=[#RichTextbox(label="Your Note (gpt-4o)"),
gr.Textbox(label="Your Note (gpt-4o)", show_copy_button=True),
gr.Number(label=".mp3 MB")],
title="Jenkins",
)
ui.config['template'] = '<!DOCTYPE html><html><title>Jenkins</title><body>{}</body></html>'
ui.launch(share=False, debug=True) |