Spaces:

kimadams
/

ai-kit

Sleeping

App Files Files Community

Kim Adams commited on Sep 6, 2023

Commit

18d13eb

1 Parent(s): f8b0a1a

adding audio analyzer

Browse files

Files changed (23) hide show

ai_voice/__pycache__/voice_handling.cpython-311.pyc +0 -0
ai_voice/voice_handling.py +6 -15
app.py +9 -7
gradio_cached_examples/59/log.csv +2 -0
gradio_cached_examples/59/output/a1a7857f4e370d5443f91a761c59e83e5a91ba66/video_sample.mp4 +0 -0
image_gen/__pycache__/ui_image_generation.cpython-311.pyc +0 -0
image_gen/{image_creation.py → ui_image_generation.py} +0 -0
prompts/__pycache__/ui_prompt_builder.cpython-311.pyc +0 -0
prompts/{prompt_builder.py → ui_prompt_builder.py} +0 -0
recording_analysis/__pycache__/recording_analysis.cpython-311.pyc +0 -0
recording_analysis/__pycache__/ui_recording_analysis.cpython-311.pyc +0 -0
recording_analysis/recording_analysis.py +98 -0
recording_analysis/transcripts/Johns_Voice_20230728_124603_Meeting_Recording.txt +1 -0
recording_analysis/ui_recording_analysis.py +60 -0
requirements.txt +2 -1
summarization/__pycache__/summarization.cpython-311.pyc +0 -0
summarization/__pycache__/ui_summarize.cpython-311.pyc +0 -0
summarization/summarization.py +1 -1
summarization/{summarize.py → ui_summarize.py} +0 -0
utilities/__pycache__/clean_text.cpython-311.pyc +0 -0
utilities/__pycache__/constants.cpython-311.pyc +0 -0
utilities/clean_text.py +2 -1
utilities/constants.py +23 -1

ai_voice/__pycache__/voice_handling.cpython-311.pyc CHANGED Viewed

Binary files a/ai_voice/__pycache__/voice_handling.cpython-311.pyc and b/ai_voice/__pycache__/voice_handling.cpython-311.pyc differ

ai_voice/voice_handling.py CHANGED Viewed

@@ -21,7 +21,6 @@ voice_id = voice_id1
 def SetVoiceId(newVoice):
     global voice_id
-    print("SetVoiceId: voice_id: "+voice_id + " newVoice "+newVoice)
     voice_id = GetVoiceId(newVoice)
 def UpdateDF():
@@ -29,23 +28,13 @@ def UpdateDF():
     dataframe.value=pd.DataFrame({"role": [""], "content": [""] })
 def PrepareForVoice(text):
-    print("prepped_text before: "+text)
     p = inflect.engine()
     prepped_text = text.replace('"', '').replace('401k', '4 oh 1 k').replace('slalom', "slallum").replace('Slalom', "slallum").replace('IT 101', "IT 1 oh 1")
     prepped_text = re.sub(r'(\d+)m', lambda m: p.number_to_words(int(m.group(1)) * 1000000), prepped_text, flags=re.IGNORECASE)
     prepped_text = re.sub(r'(\d+)k', lambda m: p.number_to_words(int(m.group(1)) * 1000), prepped_text, flags=re.IGNORECASE)
     prepped_text = re.sub(r'\$(\d+(?:,\d{3})*(?:\.\d{2})?)', lambda m: p.number_to_words(int(m.group(1).replace(',', '')) if '.' not in m.group(1) else float(m.group(1).replace(',', ''))) + ' dollars', prepped_text, flags=re.IGNORECASE)
-    print("prepped_text: after "+prepped_text)
     return clean_text.ReplaceNumbersWithWords(prepped_text)  # Change to use ReplaceNumbersWithWords directly
-'''def PrepareForVoice(text):
-    prepped_text = text.replace('"', '').replace(',', '').replace('401k', '4 oh 1 k').replace('10k', 'ten thousand').replace('slalom', "slallum").replace('Slalom', "slallum").replace('IT 101', "IT 1 oh 1")
-    print("prepped_text before: "+prepped_text)
-    prepped_text = re.sub(r'\$(\d+)', r'\1 dollars', prepped_text)
-    print("prepped_text: "+prepped_text)
-    return clean_text.ReplaceNumbersWithWords(prepped_text)'''
 def GetVoiceId(voice):
     if voice==constants.VOICE_2:
         return voice_id2
@@ -89,19 +78,21 @@ def TranslateAudio(audio):
     audio_file= open(audio_filename_with_extension, "rb")
     return openai.Audio.transcribe("whisper-1", audio_file)
-def ProcessGPTResponse(messages):
-    response = openai.ChatCompletion.create(model="gpt-3.5-turbo", messages=messages)
     cleaned_text= clean_text.RemoveRole(response["choices"][0]["message"]["content"])
     return cleaned_text
 def TranslateAndProcessAudio(audio, prompt, eval_sentiment, eval_emotion, messages, persona):
     transcript = TranslateAudio(audio)
-    print("transcript: "+transcript['text'] + " messages: "+str(messages) + " persona: "+persona)
     best_answer= embeddings.ApplyEmbeddings(transcript['text'], persona)
     optionals = user_prompts.ApplyOptionals(prompt, eval_sentiment, eval_emotion)
     user_text = f"Using the following text, answer the question '{transcript['text']}'. {optionals} {best_answer}"
     messages.append({"role": "user", "content": user_text})
-    system_message = ProcessGPTResponse( messages)
     messages.append({"role": "assistant", "content": system_message})
     processedAudio=PrepareForVoice(system_message)
     audio_html=ProcessAudio(processedAudio)

 def SetVoiceId(newVoice):
     global voice_id
     voice_id = GetVoiceId(newVoice)
 def UpdateDF():
     dataframe.value=pd.DataFrame({"role": [""], "content": [""] })
 def PrepareForVoice(text):
     p = inflect.engine()
     prepped_text = text.replace('"', '').replace('401k', '4 oh 1 k').replace('slalom', "slallum").replace('Slalom', "slallum").replace('IT 101', "IT 1 oh 1")
     prepped_text = re.sub(r'(\d+)m', lambda m: p.number_to_words(int(m.group(1)) * 1000000), prepped_text, flags=re.IGNORECASE)
     prepped_text = re.sub(r'(\d+)k', lambda m: p.number_to_words(int(m.group(1)) * 1000), prepped_text, flags=re.IGNORECASE)
     prepped_text = re.sub(r'\$(\d+(?:,\d{3})*(?:\.\d{2})?)', lambda m: p.number_to_words(int(m.group(1).replace(',', '')) if '.' not in m.group(1) else float(m.group(1).replace(',', ''))) + ' dollars', prepped_text, flags=re.IGNORECASE)
     return clean_text.ReplaceNumbersWithWords(prepped_text)  # Change to use ReplaceNumbersWithWords directly
 def GetVoiceId(voice):
     if voice==constants.VOICE_2:
         return voice_id2
     audio_file= open(audio_filename_with_extension, "rb")
     return openai.Audio.transcribe("whisper-1", audio_file)
+def Completion(messages):
+    response = openai.ChatCompletion.create(
+        model="gpt-3.5-turbo",
+        messages=messages
+    )
     cleaned_text= clean_text.RemoveRole(response["choices"][0]["message"]["content"])
     return cleaned_text
 def TranslateAndProcessAudio(audio, prompt, eval_sentiment, eval_emotion, messages, persona):
     transcript = TranslateAudio(audio)
     best_answer= embeddings.ApplyEmbeddings(transcript['text'], persona)
     optionals = user_prompts.ApplyOptionals(prompt, eval_sentiment, eval_emotion)
     user_text = f"Using the following text, answer the question '{transcript['text']}'. {optionals} {best_answer}"
     messages.append({"role": "user", "content": user_text})
+    system_message = Completion( messages)
     messages.append({"role": "assistant", "content": system_message})
     processedAudio=PrepareForVoice(system_message)
     audio_html=ProcessAudio(processedAudio)

app.py CHANGED Viewed

@@ -1,17 +1,19 @@
 import gradio as gr
 import pandas as pd
 import openai
-from image_gen import image_creation
-from summarization import summarize
 from utilities import constants,api_keys
 from ui.app_theme import SoftBlue
-from prompts import prompt_builder
 openai.api_key = api_keys.APIKeys().get_key('OPENAI_API_KEY')
-ui1=prompt_builder.ui
-ui2=summarize.ui
-ui3=image_creation.ui
-ui = gr.TabbedInterface([ui1,ui2,ui3], (constants.UI_1, constants.UI_2, constants.UI_3), theme=SoftBlue())
 ui.launch()

 import gradio as gr
 import pandas as pd
 import openai
+from image_gen import ui_image_generation
+from summarization import ui_summarize
 from utilities import constants,api_keys
+from recording_analysis import ui_recording_analysis
 from ui.app_theme import SoftBlue
+from prompts import ui_prompt_builder
 openai.api_key = api_keys.APIKeys().get_key('OPENAI_API_KEY')
+ui1=ui_prompt_builder.ui
+ui2=ui_summarize.ui
+ui3=ui_image_generation.ui
+ui4=ui_recording_analysis.ui
+ui = gr.TabbedInterface([ui1,ui2,ui3,ui4], (constants.UI_1, constants.UI_2, constants.UI_3,constants.UI_4), theme=SoftBlue())
 ui.launch()

gradio_cached_examples/59/log.csv ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ output,flag,username,timestamp
2	+ /Users/kimberlyadams/Desktop/Python/AIToolkit-HF/ai-kit/gradio_cached_examples/59/output/a1a7857f4e370d5443f91a761c59e83e5a91ba66/video_sample.mp4,,,2023-09-02 09:08:22.796516

gradio_cached_examples/59/output/a1a7857f4e370d5443f91a761c59e83e5a91ba66/video_sample.mp4 ADDED Viewed

Binary file (261 kB). View file

image_gen/__pycache__/ui_image_generation.cpython-311.pyc ADDED Viewed

Binary file (5.78 kB). View file

image_gen/{image_creation.py → ui_image_generation.py} RENAMED Viewed

File without changes

prompts/__pycache__/ui_prompt_builder.cpython-311.pyc ADDED Viewed

Binary file (7.82 kB). View file

prompts/{prompt_builder.py → ui_prompt_builder.py} RENAMED Viewed

File without changes

recording_analysis/__pycache__/recording_analysis.cpython-311.pyc ADDED Viewed

Binary file (7.44 kB). View file

recording_analysis/__pycache__/ui_recording_analysis.cpython-311.pyc ADDED Viewed

Binary file (5.61 kB). View file

recording_analysis/recording_analysis.py ADDED Viewed

	@@ -0,0 +1,98 @@

+import inflect, re, requests, json, os, openai
+import pandas as pd
+from utilities import constants, api_keys
+from moviepy.editor import VideoFileClip
+openai.api_key = api_keys.APIKeys().get_key('OPENAI_API_KEY')
+key_words=set()
+def CompletionEngine(sys_message, user_message, num_tokens, num_results, temperature, topic_model, top_p):
+    print("CompletionEngine sys_message: ", sys_message)
+    print("user_message: ", user_message)
+    return openai.ChatCompletion.create(
+        model=topic_model,
+        messages=[
+            {"role": "system", "content": sys_message},
+            {"role": "user", "content": user_message}, ],
+        max_tokens=num_tokens,
+        n=num_results,
+        temperature=temperature,
+        stop=None,
+        top_p=top_p
+    )
+def WriteKeyWords():
+    global key_words
+    key_words_dict = [word for word in key_words]
+    with open(constants.ANALYSIS_PATH, "w") as json_file:
+        json.dump(key_words_dict, json_file, indent=4)
+def CleanFileName(input_file):
+        base_name_with_extension = os.path.basename(input_file)
+        file_name, _ = os.path.splitext(base_name_with_extension)
+        file_name = re.sub(r'[ .-]', '_', file_name)
+        return file_name
+def CleanText(message):
+    cleaned_message = re.sub(r':(\w+):', r'\1', message)
+    cleaned_message = re.sub(r'http\S+|www.\S+', '', cleaned_message)
+    return cleaned_message
+def ProcessTranscript(transcript,messages):
+    transcript=CleanText(transcript)
+    messages.append({"role": "system", "content": constants.ANALYSIS_SYSTEM_MESSAGE})
+    completion=CompletionEngine(constants.ANALYSIS_SYSTEM_MESSAGE,transcript,constants.SUMMARY_TOKENS, constants.NUM_RESULTS, constants.TEMP, constants.ANALYSIS_MODEL,constants.TOP_P)
+    print("before options, completion:")
+    print(completion)
+    summary = completion.choices[0]['message']['content']
+    messages.append({"role": "assistant", "content": summary})
+    print("---summary: ")
+    print(summary)
+    return str(summary), messages
+def FindTopics(transcript, messages):
+    messages.append({"role": "system", "content": constants.KEYWORD_SYSTEM_MESSAGE})
+    topicCompletion=CompletionEngine(constants.KEYWORD_SYSTEM_MESSAGE,transcript,constants.KEYWORD_TOKENS, constants.NUM_RESULTS, constants.TEMP, constants.ANALYSIS_MODEL,constants.TOP_P)
+    topics=topicCompletion.choices[0]['message']['content']
+    messages.append({"role": "assistant", "content": topics})
+    return topics, messages
+def StripAndTranslateAudio(input_file):
+    if os.path.exists(input_file):
+        print(f"File {input_file} exists.")
+    else:
+        return f"File {input_file} does not exist."
+    video = VideoFileClip(input_file)
+    audio = video.audio
+    if not input_file.endswith((".mp4", ".mov", ".avi", ".mkv")):
+        return "File should be .mp4, .mov, .avi, or .mkv format."
+    file_name=CleanFileName(input_file)
+    transcript=""
+    messages=[]
+    #see if already transcribed, if so, return the transcript
+    if os.path.exists(constants.TRANSCRIPT_PATH+file_name+".txt"):
+        with open(constants.TRANSCRIPT_PATH+file_name+".txt", "r") as f:
+            transcript = f.read()
+            messages.append({"role": "system", "content": "audio file/whisper-1"})
+            messages.append({"role": "transcribe", "content": transcript})
+    else:
+        audio_file = constants.ORIGINALS_PATH+"audio_" + file_name + '.wav'
+        print(audio_file)
+        audio.write_audiofile(audio_file)
+        messages.append({"role": "system", "content": "audio file/whisper-1"})
+        with open(audio_file, 'rb') as f:
+            transcript = openai.Audio.transcribe("whisper-1", f)
+            messages.append({"role": "transcribe", "content": transcript})
+            transcript = transcript['text']
+            with open(constants.TRANSCRIPT_PATH+file_name+".txt", "w") as f:
+                f.write(transcript)
+    transcript,messages= ProcessTranscript(transcript,messages)
+    topics,messages = FindTopics(transcript,messages)
+    df = pd.DataFrame(messages)
+    return transcript, topics, df

recording_analysis/transcripts/Johns_Voice_20230728_124603_Meeting_Recording.txt ADDED Viewed

	@@ -0,0 +1 @@

+ At Slalom Build, we are motivated by an unwavering passion for technology and the transformative power it holds. We are the makers, the planners, the creatives, and the coders who are driven to build a world of true change. Our journey begins with a clear vision, a world made better through technology's boundless potential. Fearless and determined, we eagerly explore every opportunity. In perfect sync with our clients, we fearlessly tackle today's challenges. We focus on the present, plot the next steps, and rapidly co-create cutting-edge products and experiences that will shape a future of impact. We are motivated by the opportunity to build the future we envision, and we invite anyone with the same drive and determination to join us on this extraordinary quest. Slalom Build, where innovation takes action. Let's build a world of endless possibilities together. Are you ready? How's that?

recording_analysis/ui_recording_analysis.py ADDED Viewed

	@@ -0,0 +1,60 @@

+import gradio as gr
+import pandas as pd
+import os, re
+from utilities import constants
+from recording_analysis import recording_analysis
+def InitDF():
+    global analysisDF
+    analysisDF=pd.DataFrame({"role": [""], "content": [""] })
+def VideoIdentity(video):
+    transcript,topics,df = recording_analysis.StripAndTranslateAudio(video)
+    html=RenderBoxes(topics)
+    return transcript, html, df
+def RenderBoxes(strings_list):
+    # Generate HTML for boxes
+    print("strings_list: ", strings_list)
+    clean_list = [re.sub(r'^\d+\.\s*', '', topic) for topic in strings_list.split('\n') if topic.strip()]
+    print("clean_list: ", clean_list)
+    boxes_html = ""
+    for string in clean_list:
+       # boxes_html += f'<div style="background-color: #1A5A71; display: inline-block; margin: 5px; padding: 10px;">{string}</div>'
+        boxes_html += f'<div style="background-color: #107276; color: #ffffff; display: inline-block; font-size: 13pt; margin: 5px; padding: 10px;">{string}</div>'
+    return boxes_html
+def Unload():
+    print("Unload- ")
+    global videoBlock, summaryBlock, topics, analysisDF
+    videoBlock=None
+    return "","",pd.DataFrame({"role": [""], "content": [""] })
+with gr.Blocks() as ui:
+    label2 = gr.Label(show_label=False, value=constants.RECORDING_ANALYSIS, container=False)
+    with gr.Row():
+        with gr.Column():
+            gr.Markdown(constants.PURPOSE_MD)
+            gr.Markdown(constants.RECORDING_ANALYSIS_PURPOSE)
+        with gr.Column():
+            gr.Markdown(constants.DIRECTIONS_MD)
+            gr.Markdown(value=constants.RECORDING_ANALYSIS_DIRECTIONS)
+    with gr.Row():
+        videoBlock = gr.Video(label=constants.VIDEO_INPUT, source="upload")
+        summaryBlock= gr.Textbox (label=constants.SUMMARY)
+    with gr.Column():
+        gr.Markdown(constants.TOPICS_MD)
+        topics= gr.HTML ()
+    with gr.Row():
+        submitBtn=gr.Button(value=constants.EXTRACT_SUMMARY, variant="primary")
+        clearBtn=gr.Button(value=constants.CLEAR)
+    with gr.Row():
+        analysisDF=gr.DataFrame(type="pandas", value=pd.DataFrame({"role": [""], "content": [""] }),
+                               wrap=True, show_label=False, label=constants.OPENAI_LOG)
+    submitBtn.click(VideoIdentity, inputs=[videoBlock], outputs=[summaryBlock,topics,analysisDF])
+    clearBtn.click(Unload, inputs=[], outputs=[topics,summaryBlock,analysisDF])
+    InitDF()

requirements.txt CHANGED Viewed

@@ -20,4 +20,5 @@ lxml==4.9.3
 PyPDF2==3.0.1
 tiktoken==0.4.0
 inflect==7.0.0
-diffusers==0.10.2

 PyPDF2==3.0.1
 tiktoken==0.4.0
 inflect==7.0.0
+diffusers==0.10.2
+moviepy==1.0.3

summarization/__pycache__/summarization.cpython-311.pyc CHANGED Viewed

Binary files a/summarization/__pycache__/summarization.cpython-311.pyc and b/summarization/__pycache__/summarization.cpython-311.pyc differ

summarization/__pycache__/ui_summarize.cpython-311.pyc ADDED Viewed

Binary file (6.01 kB). View file

summarization/summarization.py CHANGED Viewed

@@ -19,6 +19,6 @@ def SummarizeCompletion(code):
     summary_messages.append({"role": "user", "content": truncated_code})
     response_message = Completion(summary_messages)
-    summary_messages.append({"role": "system", "content": response_message})
     return summary_messages, truncated_prefix+response_message+"\n\nNum Characters: "+str(len(truncated_code))

     summary_messages.append({"role": "user", "content": truncated_code})
     response_message = Completion(summary_messages)
+    summary_messages.append({"role": "assistant", "content": response_message})
     return summary_messages, truncated_prefix+response_message+"\n\nNum Characters: "+str(len(truncated_code))

summarization/{summarize.py → ui_summarize.py} RENAMED Viewed

File without changes

utilities/__pycache__/clean_text.cpython-311.pyc CHANGED Viewed

Binary files a/utilities/__pycache__/clean_text.cpython-311.pyc and b/utilities/__pycache__/clean_text.cpython-311.pyc differ

utilities/__pycache__/constants.cpython-311.pyc CHANGED Viewed

Binary files a/utilities/__pycache__/constants.cpython-311.pyc and b/utilities/__pycache__/constants.cpython-311.pyc differ

utilities/clean_text.py CHANGED Viewed

@@ -17,4 +17,5 @@ def ReplaceNumbersWithWords(text):
             words[i] = constants.SYMBOL_TO_WORD[word]
     reply=' '.join(words)
     print('returning: '+reply)
-    return reply

             words[i] = constants.SYMBOL_TO_WORD[word]
     reply=' '.join(words)
     print('returning: '+reply)
+    return reply

utilities/constants.py CHANGED Viewed

@@ -76,6 +76,7 @@ QUESTIONS_INVESTIGATOR="How do I report a crime? What is the process for filing
 UI_1="Prompt Builder"
 UI_2="Summary Extraction"
 UI_3="Image Creation"
 PURPOSE_MD="### Purpose"
 DIRECTIONS_MD="### Directions"
@@ -112,6 +113,10 @@ IMAGE_SETTING="Image Setting"
 IMAGE_SETTING_INFO="Select an image setting."
 GENERATE_IMAGES="Generate Images"
 GENERATED_IMAGES="Generated Images"
 DEFAULT_LANGUAGE=LANGUAGE_1
 DEFAULT_PERSONA=PERSONA_HR_EXPERT="HR Expert"
 DEFAULT_VOICE=VOICE_1
@@ -130,4 +135,21 @@ SYMBOL_TO_WORD = {
     '%': "percent",
     '@': "at",
     '#': "pound"
-}

 UI_1="Prompt Builder"
 UI_2="Summary Extraction"
 UI_3="Image Creation"
+UI_4="Recording Analysis"
 PURPOSE_MD="### Purpose"
 DIRECTIONS_MD="### Directions"
 IMAGE_SETTING_INFO="Select an image setting."
 GENERATE_IMAGES="Generate Images"
 GENERATED_IMAGES="Generated Images"
+TOPICS_MD="### Topics"
+VIDEO_INPUT="Upload a Video(.mp4)"
+EXTRACT_SUMMARY="Extract Summary"
+RECORDING_ANALYSIS="Recording Analysis"
 DEFAULT_LANGUAGE=LANGUAGE_1
 DEFAULT_PERSONA=PERSONA_HR_EXPERT="HR Expert"
 DEFAULT_VOICE=VOICE_1
     '%': "percent",
     '@': "at",
     '#': "pound"
+}
+RECORDING_ANALYSIS_PURPOSE="Generative AI can help you quickly summarize and identify key concepts from videos. The 'Translation' action chains 3 tasks together using preset prompts. The first task separates and translates the audio to text using OpenAI Whisper-1, the second summarizes the text using OpenAI Chat Completion and the third extracts key concepts from the video also using OpenAI Chat Completion."
+RECORDING_ANALYSIS_DIRECTIONS="To get started, uploade a video (.mp4) that you'd like to summarize. Tap 'Translate'. You'll see a Summary of the video in the 'Summary' section and a set of key concepts in the 'Topics' section. Prompt history will display in the 'OpenAI Communication Log' box, below."
+TRANSCRIPT_PATH="recording_analysis/transcripts/"
+ORIGINALS_PATH="recording_analysis/originals/"
+ANALYSIS_PATH="recording_analysis/data/"
+ANALYSIS_MODEL ="gpt-3.5-turbo"
+SUMMARY_TOKENS=1024
+KEYWORD_TOKENS=300
+NUM_RESULTS=1
+TEMP=0
+TOP_P=0.2
+ANALYSIS_SYSTEM_MESSAGE="You will be provided a transcript, your task is to summarize the transcript as follows:-Overall summary of content,-If applicable, Action items (what needs to be done and who is doing it),-If applicable, a list of topics that are covered, -If applicable, a set of follow up items for areas where decisions are pending."
+KEYWORD_SYSTEM_MESSAGE="You will be provided with a block of text, and your task is to extract a list of up to 15 keywords from it, filter to keywords that appear more than once or are relevant to the central theme."