File size: 5,459 Bytes
383fc57
 
eea11c3
c664abc
e8df9a7
 
5fac707
e8df9a7
 
 
2048eff
 
1e6668b
4d00fc6
 
 
69a15a9
2048eff
 
 
383fc57
69a15a9
 
e8df9a7
 
 
c2a2252
 
e8df9a7
de72d8a
5063082
 
 
 
 
 
 
 
4a955e8
7ffd1df
5063082
de72d8a
c85decc
e8df9a7
 
e4fddaa
 
d23413d
 
9568566
d23413d
7196f1f
9568566
 
3c7baec
e4fddaa
d23413d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c85decc
c64345b
e8df9a7
 
 
 
c64345b
 
 
 
e8df9a7
094fe7f
 
 
 
 
2048eff
094fe7f
 
 
 
eea11c3
094fe7f
 
 
2048eff
 
e8df9a7
 
 
 
 
 
 
 
 
 
 
 
2048eff
e8df9a7
 
 
 
 
 
4d00fc6
 
bb56de7
a4f19ec
2048eff
3c7baec
6e183d3
e8df9a7
 
 
9568566
38eff04
f13906f
77b171b
5063082
e8df9a7
 
aae8a6c
e8df9a7
4d00fc6
 
3eb966b
b6c1c87
97fca9b
d4994f6
e8df9a7
36a9eec
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
import os
import openai
import time
import numpy as np
from numpy import True_
import gradio as gr
from gradio_rich_textbox import RichTextbox
import soundfile as sf
from pydub import AudioSegment

from openai import OpenAI

########## For creating a debug report
# import subprocess
# myGradioEnvironment = subprocess.run(['gradio','environment'], stdout=subprocess.PIPE)
# print(myGradioEnvironment.stdout.decode('utf-8'))

# Load API key from an environment variable
OPENAI_SECRET_KEY = os.environ.get("OPENAI_SECRET_KEY")
client = OpenAI(api_key = OPENAI_SECRET_KEY)



note_transcript = ""

def transcribe(audio, history_type):
  global note_transcript
  print(f"Received audio file path: {audio}")
     
  history_type_map = {
      "History Simple": "McIntyre_Short_History.txt",
      "History Complex": "McIntyre_Long_History.txt",
      "Full Visit": "McIntyre_Full_Visit.txt",
      "Impression/Plan": "McIntyre_Impression.txt",
      "EMS": "McIntyre_EMS_Handover.txt",
      "Dx/DDx": "McIntyre_Dx_DDx_Format.txt",
      "Feedback": "McIntyre_Feedback.txt",
      "Hallway Consult": "McIntyre_Hallway_Consult_Format.txt"
         }
    
  file_name = history_type_map.get(history_type, "McIntyre_Full_Visit.txt")
  with open(f"Format_Library/{file_name}", "r") as f:
    role = f.read()
  messages = [{"role": "system", "content": role}]


  ######################## Take Audio from Numpy Array
  #samplerate, audio_data = audio
  #if isinstance(audio_data[0], np.ndarray) and len(audio_data[0]) == 2:
    # Convert stereo audio data to mono by averaging the two channels
  #    audio_data = np.mean(audio_data, axis=1).astype(np.int16)

    # If the audio data is already mono, no conversion is needed
      
        
  ######################## Read audio file, if using file
  max_attempts = 1
  attempt = 0
  audio_data = None
  samplerate = None
  while attempt < max_attempts:
      try:
          if audio is None:
              raise TypeError("Invalid file: None")
          audio_data, samplerate = sf.read(audio)
          break
      except (OSError, TypeError) as e:
          print(f"Attempt {attempt + 1} of {max_attempts} failed with error: {e}")
          attempt += 1
          time.sleep(3)
  else:
      print(f"###############Failed to open audio file after {max_attempts} attempts.##############")
      return  # Terminate the function or raise an exception if the file could not be opened

  ###################Code to convert .wav to .mp3 (if neccesary)
  sf.write("Audio_Files/test.wav", audio_data, samplerate, subtype='PCM_16')
  sound = AudioSegment.from_wav("Audio_Files/test.wav")
  sound.export("Audio_Files/test.mp3", format="mp3")

  sf.write("Audio_Files/test.mp3", audio_data, samplerate)
  
    
  ################  Send file to Whisper for Transcription
  audio_file = open("Audio_Files/test.mp3", "rb")
  
  max_attempts = 3
  attempt = 0
  while attempt < max_attempts:
      try:
          audio_transcript = client.audio.transcriptions.create(model="whisper-1", file=audio_file)
          break
      except openai.error.APIConnectionError as e:
          print(f"Attempt {attempt + 1} failed with error: {e}")
          attempt += 1
          time.sleep(3) # wait for 3 seconds before retrying
  else:
      print("Failed to transcribe audio after multiple attempts")  
    
  print(audio_transcript.text)
  messages.append({"role": "user", "content": audio_transcript.text})
  
  #Create Sample Dialogue Transcript from File (for debugging)
  #with open('Audio_Files/Test_Elbow.txt', 'r') as file:
  #  audio_transcript = file.read()
  #messages.append({"role": "user", "content": audio_transcript})
  

  ### Word and MB Count
  file_size = os.path.getsize("Audio_Files/test.mp3")
  mp3_megabytes = file_size / (1024 * 1024)
  mp3_megabytes = round(mp3_megabytes, 2)

  audio_transcript_words = audio_transcript.text.split() # Use when using mic input
  #audio_transcript_words = audio_transcript.split() #Use when using file

  num_words = len(audio_transcript_words)


  #Ask OpenAI to create note transcript
  response = client.chat.completions.create(model="gpt-4o", temperature=0, messages=messages)
  #response = client.chat.completions.create(model="gpt-4-turbo-2024-04-09", temperature=0, messages=messages)
  #response = client.chat.completions.create(model="gpt-3.5-turbo", temperature=0, messages=messages)
    
  note_transcript = response.choices[0].message.content
  print(note_transcript)
  return [note_transcript, num_words, mp3_megabytes]

#Define Gradio Interface
my_inputs = [
    #gr.Audio(source="microphone", type="filepath"), #Gradio 3.48.0
    #gr.Audio(sources=["microphone"], type="filepath",format="wav"), #Gradio 4.x
    #gr.Audio(sources=["microphone"],type="numpy",editable="false"), #Gradio 4.x
    gr.Microphone(type="filepath",format="wav"), #Gradio 4.x
    gr.Radio(["History Simple","History Complex","Full Visit","Impression/Plan","EMS","Dx/DDx","Feedback","Hallway Consult"], show_label=False),
]

ui = gr.Interface(fn=transcribe,
                  inputs=my_inputs, 
                  outputs=[#RichTextbox(label="Your Note (gpt-4o)"),
                           gr.Textbox(label="Your Note (gpt-4o)", show_copy_button=True),
                           gr.Number(label=".mp3 MB")],
                  title="Jenkins",
                 )
ui.config['template'] = '<!DOCTYPE html><html><title>Jenkins</title><body>{}</body></html>'

ui.launch(share=False, debug=True)