Spaces:

cloud-sean
/

summerizer

Sleeping

App Files Files Community

cloud-sean commited on Nov 17, 2023

Commit

c73560f

1 Parent(s): 2786061

Create app.py

Browse files

Files changed (1) hide show

app.py +277 -0

app.py ADDED Viewed

	@@ -0,0 +1,277 @@

+import streamlit as st
+import os, uuid, json
+import requests
+import os
+import openai
+import time
+from tempfile import NamedTemporaryFile
+from st_audiorec import st_audiorec
+from azure.identity import DefaultAzureCredential
+from azure.storage.blob import BlobServiceClient, BlobClient, ContainerClient
+from datetime import datetime
+from pydub import AudioSegment
+AOAI_ENDPOINT = "https://whisper-aoai-sean.openai.azure.com"
+WHISPER_DEPLOYMENT_NAME = "whisper"
+AOAI_KEY = os.environ.get("AOAI_KEY")
+WHISPER_PROMPT = "The following is a conversation between a doctor and a patient."
+AOAI_PROMPT_DOCTOR = "I am a doctor. create a summary of this patient encounter for me. respond in the same language as the text was given in."
+AOAI_PROMPT_STANDARD = "Summerize this text. Call out key points. Return in markdown format."
+AZURE_BLOB_CONNECTION_STRING = os.environ.get("AZURE_BLOB_CONNECTION_STRING")
+TRANSCRIPTION_API_KEY = os.environ.get("TRANSCRIPTION_API_KEY")
+wav_audio_data = None
+openai.api_type = "azure"
+openai.api_base = aoai_endpoint = "https://eastus-openai-sean.openai.azure.com/"
+openai.api_key = aoai_key = "26d0aaa9d01340cca61da08b29c44069"
+openai.api_version = "2023-07-01-preview"
+if "hebrew_mode" not in st.session_state:
+    st.session_state["hebrew_mode"] = ''
+if "summary" not in st.session_state:
+    st.session_state["summary"] = ''
+if "request_status" not in st.session_state:
+    st.session_state["request_status"] = "Pending"
+if "transcription" not in st.session_state:
+    st.session_state["transcription"] = ''
+if "recording" not in st.session_state:
+    st.session_state["recording"] = 'na'
+if "clicked" not in st.session_state:
+    st.session_state["clicked"] = False
+if "raw_transcription" not in st.session_state:
+    st.session_state["raw_transcription"] = ''
+def click_button():
+    st.session_state["clicked"] = True
+def create_transcription_request(blob_url):
+    url = "https://eastus.api.cognitive.microsoft.com/speechtotext/v3.2-preview.1/transcriptions"
+    locale = "en-us"
+    if st.session_state.hebrew_mode:
+        locale = "he-il"
+    payload = json.dumps({
+    "displayName": "20231106_182337",
+    "description": "Speech Studio Batch speech to text",
+    "locale": locale,
+    "contentUrls": [
+        blob_url
+    ],
+    "model": {
+        "self": "https://eastus.api.cognitive.microsoft.com/speechtotext/v3.2-preview.1/models/base/e830341e-8f47-4e0a-b64c-3f66167b751c"
+    },
+    "properties": {
+        "wordLevelTimestampsEnabled": False,
+        "displayFormWordLevelTimestampsEnabled": False,
+        "diarizationEnabled": True,
+        "diarization": {
+        "speakers": {
+            "minCount": 1,
+            "maxCount": 2
+        }
+        },
+        "punctuationMode": "DictatedAndAutomatic",
+        "profanityFilterMode": "Masked"
+    },
+    "customProperties": {}
+    })
+    headers = {
+    'Ocp-Apim-Subscription-Key': TRANSCRIPTION_API_KEY,
+    'Content-Type': 'application/json'
+    }
+    response = requests.request("POST", url, headers=headers, data=payload)
+    if response.status_code != 201:
+        return st.error("Error creating transcription request")
+    else:
+        return response.json()["self"]
+def attempt_to_get_transcription(transcription_url):
+    headers = {
+    'Ocp-Apim-Subscription-Key': TRANSCRIPTION_API_KEY,
+    'Content-Type': 'application/json'
+    }
+    output = requests.get(transcription_url, headers=headers).json()
+    return output["status"]
+def extract_conversation(json_data):
+    # Parse the JSON data
+    data = json.loads(json_data)
+    # Extract the recognized phrases
+    recognized_phrases = data.get("recognizedPhrases", [])
+    # Sort the phrases by offsetInTicks (if they're not already sorted)
+    recognized_phrases.sort(key=lambda x: x.get("offsetInTicks", 0))
+    # Build the conversation string
+    conversation = []
+    for phrase in recognized_phrases:
+        speaker = f"Person {phrase.get('speaker')}"
+        # Assuming we want to take the first 'nBest' element as it's the most confident one
+        text = phrase['nBest'][0].get('display', '')
+        conversation.append(f"{speaker}: {text} \n")
+    # Join the conversation lines with a newline character
+    return '\n'.join(conversation)
+def get_final_transcription(transcription_url):
+    headers = {
+    'Ocp-Apim-Subscription-Key': TRANSCRIPTION_API_KEY,
+    'Content-Type': 'application/json'
+    }
+    transcription_url = f"{transcription_url}/files"
+    output = requests.get(transcription_url, headers=headers).json()["values"]
+    for item in output:
+        if item["kind"] == "Transcription":
+            output = item["links"]["contentUrl"]
+            break
+    request = requests.get(output, headers=headers)
+    return extract_conversation(request.text)
+def upload_audio(audio_bytes):
+    # save audio to temp file
+    now = datetime.now()
+    filename = now.strftime("%Y%m%d_%H%M%S") + ".wav"
+    # save it as a temporary file
+    with NamedTemporaryFile(delete=False) as f:
+        if type(audio_bytes) == bytes:
+            f.write(audio_bytes)
+        else:
+            f.write(audio_bytes.getbuffer())
+        temp_filename = f.name
+    sound = AudioSegment.from_wav(temp_filename)
+    sound = sound.set_channels(1)
+    sound.export(f"{temp_filename}.wav", format="wav")
+    blob_service_client = BlobServiceClient.from_connection_string(AZURE_BLOB_CONNECTION_STRING)
+    blob_client = blob_service_client.get_blob_client(container="audiofiles", blob=filename)
+    try:
+        with open(f"{temp_filename}.wav", "rb") as data:
+            blob_client.upload_blob(data)
+        return blob_client.url
+    except:
+        return st.error("Error uploading to Azure Blob Storage")
+def summerize_with_gpt(text, additional="Standard"):
+    response = openai.ChatCompletion.create(
+      engine="gpt-4-32k",
+      messages = [{"role":"system","content": f"{AOAI_PROMPT_DOCTOR} \n {additional}"}, {"role":"user","content":text}],
+      temperature=0.2,
+      max_tokens=1200,
+      top_p=0.95,
+      frequency_penalty=0,
+      presence_penalty=0,
+      stop=None)
+    return response.choices[0].message.content
+def transcribe(audio_bytes):
+    url = f"{AOAI_ENDPOINT}/openai/deployments/{WHISPER_DEPLOYMENT_NAME}/audio/transcriptions?prompt={WHISPER_PROMPT}&api-key={AOAI_KEY}&api-version=2023-09-01-preview"
+    files = [
+        ('file', ('Recording.wav', audio_bytes, 'application/octet-stream'))
+    ]
+    response = requests.post(url, files=files)
+    return response.json()
+st.title("Summerizer 🧬")
+st.session_state.hebrew_mode = st.toggle("Hebew", False)
+# st.session_state.hebrew_mode = st.toggle("Hebrew Mode", False)
+select_container = st.empty()
+text_box = st.empty()
+request_completed = False
+tmp = ""
+html_right = "<div style='text-align: right;>"
+with select_container.container():
+    select = st.selectbox("Upload or Record", ("Upload", "Record", "Text"))
+    if select == "Record":
+        wav_audio_data = st_audiorec()
+    elif select == "Upload":
+        wav_audio_data = st.file_uploader("Upload Audio", type=["wav"])
+    elif select == "Text":
+        text_data = st.text_area("Enter Text")
+        summary_types = st.text_input("Enter Summary Type etc. (Standard, Bullet, or Paragraph)")
+    done_speech_button = st.button("Upload", on_click=click_button)
+if st.session_state.clicked:
+    if wav_audio_data is not None:
+        st.session_state.clicked = False
+        with st.spinner("Uploading to Azure Blob storage..."):
+            blob_url = upload_audio(wav_audio_data)
+            st.toast("Successfully Uploaded!",icon="✅")
+        with st.status("Using Azure Speech with OpenAI's Whisper to transcribe..."):
+            transcription_request = create_transcription_request(blob_url)
+            time.sleep(1)
+            st.write("Transcription Request Created!")
+            st.toast("Successfully Created Transcription Request!",icon="✅")
+            while request_completed == False:
+                request_status = attempt_to_get_transcription(transcription_request)
+                if tmp != request_status:
+                    st.write(f"Transcription Status: {request_status}")
+                time.sleep(1)
+                tmp = request_status
+                if request_status == "Succeeded":
+                    st.write("Transcription Complete!")
+                    st.toast("Successfully Transcribed!",icon="✅")
+                    request_completed = True
+            st.write("Grabbing Transcription...")
+            time.sleep(1)
+            raw_transcription = get_final_transcription(transcription_url=transcription_request)
+            st.write("Successfully Grabbed Transcription!")
+        with st.expander("Transcription", False):
+            if st.session_state.hebrew_mode:
+                st.markdown(f"<div style='text-align: right;'> {raw_transcription} </div>",unsafe_allow_html=True)
+            else:
+                st.session_state.raw_transcript = st.markdown(f"{raw_transcription}")
+        with st.status("Using GPT-4 to summerize..."):
+            st.write("Starting up the GPUs!")
+            st.session_state.summary = summerize_with_gpt(raw_transcription)
+            st.write("Successfully Summerized!")
+            st.toast("Successfully Summerized!",icon="✅")
+        with st.expander("Summary", False):
+            if st.session_state.hebrew_mode:
+                st.markdown(f"<div style='text-align: right;'> {st.session_state.summary} </div>",unsafe_allow_html=True)
+            else:
+                st.markdown(f"{st.session_state.summary}",unsafe_allow_html=True)
+    elif text_data is not None:
+        st.session_state.clicked = False
+        with st.status("Using GPT-4 to summerize..."):
+            st.write("Starting up the GPUs!")
+            st.session_state.summary = summerize_with_gpt(text_data, summary_types)
+            st.write("Successfully Summerized!")
+            st.toast("Successfully Summerized!",icon="✅")
+        with st.expander("Summary", False):
+            if st.session_state.hebrew_mode:
+                st.markdown(f"<div style='text-align: right;'> {st.session_state.summary} </div>",unsafe_allow_html=True)
+            else:
+                st.markdown(f"{st.session_state.summary}",unsafe_allow_html=True)
+    else:
+        st.error("Please upload or record audio")
+        st.session_state.clicked = False