Kim Adams commited on
Commit ·
8a5a18b
1
Parent(s): 5cc4102
adding ocr
Browse files- app.py +4 -3
- chat_bot/__pycache__/ui_simple_chat.cpython-311.pyc +0 -0
- chat_bot/ui_simple_chat.py +5 -5
- craft_mlt_25k.pth +3 -0
- english_g2.pth +3 -0
- image_gen/__pycache__/ui_image_generation.cpython-311.pyc +0 -0
- image_to_text/__pycache__/image_to_text_translator.cpython-311.pyc +0 -0
- image_to_text/__pycache__/ui_image_to_text.cpython-311.pyc +0 -0
- image_to_text/image_to_text.py +132 -0
- image_to_text/image_to_text_translator.py +65 -0
- image_to_text/ui_image_to_text.py +53 -0
- recording_analysis/__pycache__/recording_analysis.cpython-311.pyc +0 -0
- recording_analysis/recording_analysis.py +13 -13
- requirements.txt +2 -1
- utilities/__pycache__/constants.cpython-311.pyc +0 -0
- utilities/constants.py +6 -4
app.py
CHANGED
|
@@ -9,7 +9,7 @@ from recording_analysis import ui_recording_analysis
|
|
| 9 |
from prompts import ui_prompt_builder
|
| 10 |
from chat_bot import ui_simple_chat
|
| 11 |
from slack_sentiment_analysis import ui_sentiment_analysis
|
| 12 |
-
|
| 13 |
|
| 14 |
openai.api_key = api_keys.APIKeys().get_key('OPENAI_API_KEY')
|
| 15 |
|
|
@@ -19,9 +19,10 @@ image_generator=ui_image_generation.ui
|
|
| 19 |
video_analysis=ui_recording_analysis.ui
|
| 20 |
slack_sentiment=ui_sentiment_analysis.ui
|
| 21 |
usaa_advisor=ui_simple_chat.ui
|
|
|
|
| 22 |
|
| 23 |
-
ui = gr.TabbedInterface([prompt_builder, summarize, image_generator, video_analysis,slack_sentiment, usaa_advisor],
|
| 24 |
-
(constants.UI_1, constants.UI_2, constants.UI_3,constants.UI_4, constants.UI_5,constants.UI_6),
|
| 25 |
theme=SoftBlue())
|
| 26 |
|
| 27 |
ui.launch()
|
|
|
|
| 9 |
from prompts import ui_prompt_builder
|
| 10 |
from chat_bot import ui_simple_chat
|
| 11 |
from slack_sentiment_analysis import ui_sentiment_analysis
|
| 12 |
+
from image_to_text import image_to_text_translator as ui_image_to_text
|
| 13 |
|
| 14 |
openai.api_key = api_keys.APIKeys().get_key('OPENAI_API_KEY')
|
| 15 |
|
|
|
|
| 19 |
video_analysis=ui_recording_analysis.ui
|
| 20 |
slack_sentiment=ui_sentiment_analysis.ui
|
| 21 |
usaa_advisor=ui_simple_chat.ui
|
| 22 |
+
image_to_text=ui_image_to_text.ui
|
| 23 |
|
| 24 |
+
ui = gr.TabbedInterface([prompt_builder, summarize, image_generator, video_analysis,slack_sentiment, image_to_text, usaa_advisor],
|
| 25 |
+
(constants.UI_1, constants.UI_2, constants.UI_3,constants.UI_4, constants.UI_5,constants.UI_6,constants.UI_7),
|
| 26 |
theme=SoftBlue())
|
| 27 |
|
| 28 |
ui.launch()
|
chat_bot/__pycache__/ui_simple_chat.cpython-311.pyc
CHANGED
|
Binary files a/chat_bot/__pycache__/ui_simple_chat.cpython-311.pyc and b/chat_bot/__pycache__/ui_simple_chat.cpython-311.pyc differ
|
|
|
chat_bot/ui_simple_chat.py
CHANGED
|
@@ -11,25 +11,25 @@ def InitDF():
|
|
| 11 |
commDF=pd.DataFrame({"role": [""], "content": [""] })
|
| 12 |
simple_chat.CreateEmbeddings(constants.PDF_INPUT_PATH, constants.TXT_OUTPUT_PATH)
|
| 13 |
|
| 14 |
-
def
|
| 15 |
bot_message,df=simple_chat.QueryEmbeddingsSimple(message)
|
| 16 |
chat_history.append((message, bot_message))
|
| 17 |
return "", chat_history,df
|
| 18 |
|
| 19 |
with gr.Blocks() as ui:
|
| 20 |
-
label = gr.Label(show_label=False, value=constants.
|
| 21 |
with gr.Row():
|
| 22 |
with gr.Column():
|
| 23 |
gr.Markdown(constants.PURPOSE_MD)
|
| 24 |
-
gr.Markdown(constants.
|
| 25 |
with gr.Column():
|
| 26 |
gr.Markdown(constants.DIRECTIONS_MD)
|
| 27 |
-
gr.Markdown(constants.
|
| 28 |
question= gr.Textbox (label=constants.QUESTIONS_PREFIX, value=constants.QUESTIONS_AR_EXPERT)
|
| 29 |
chatbot = gr.Chatbot(label=constants.CHAT_BOT, height=constants.CHAT_BOT_HEIGHT)
|
| 30 |
msg = gr.Textbox(label=constants.CHAT_BOT_INPUT)
|
| 31 |
commDF = gr.DataFrame(type="pandas", value=pd.DataFrame({"role": [""], "content": [""] }), wrap=True, label=constants.OPENAI_LOG)
|
| 32 |
clear = gr.ClearButton([msg, chatbot])
|
| 33 |
-
msg.submit(
|
| 34 |
|
| 35 |
InitDF()
|
|
|
|
| 11 |
commDF=pd.DataFrame({"role": [""], "content": [""] })
|
| 12 |
simple_chat.CreateEmbeddings(constants.PDF_INPUT_PATH, constants.TXT_OUTPUT_PATH)
|
| 13 |
|
| 14 |
+
def Respond(message, chat_history):
|
| 15 |
bot_message,df=simple_chat.QueryEmbeddingsSimple(message)
|
| 16 |
chat_history.append((message, bot_message))
|
| 17 |
return "", chat_history,df
|
| 18 |
|
| 19 |
with gr.Blocks() as ui:
|
| 20 |
+
label = gr.Label(show_label=False, value=constants.UI_7, container=False)
|
| 21 |
with gr.Row():
|
| 22 |
with gr.Column():
|
| 23 |
gr.Markdown(constants.PURPOSE_MD)
|
| 24 |
+
gr.Markdown(constants.AR_ADVISOR_PURPOSE)
|
| 25 |
with gr.Column():
|
| 26 |
gr.Markdown(constants.DIRECTIONS_MD)
|
| 27 |
+
gr.Markdown(constants.AR_ADVISOR_DIRECTIONS)
|
| 28 |
question= gr.Textbox (label=constants.QUESTIONS_PREFIX, value=constants.QUESTIONS_AR_EXPERT)
|
| 29 |
chatbot = gr.Chatbot(label=constants.CHAT_BOT, height=constants.CHAT_BOT_HEIGHT)
|
| 30 |
msg = gr.Textbox(label=constants.CHAT_BOT_INPUT)
|
| 31 |
commDF = gr.DataFrame(type="pandas", value=pd.DataFrame({"role": [""], "content": [""] }), wrap=True, label=constants.OPENAI_LOG)
|
| 32 |
clear = gr.ClearButton([msg, chatbot])
|
| 33 |
+
msg.submit(Respond, [msg, chatbot], [msg, chatbot,commDF])
|
| 34 |
|
| 35 |
InitDF()
|
craft_mlt_25k.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4a5efbfb48b4081100544e75e1e2b57f8de3d84f213004b14b85fd4b3748db17
|
| 3 |
+
size 83152330
|
english_g2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e2272681d9d67a04e2dff396b6e95077bc19001f8f6d3593c307b9852e1c29e8
|
| 3 |
+
size 15143997
|
image_gen/__pycache__/ui_image_generation.cpython-311.pyc
CHANGED
|
Binary files a/image_gen/__pycache__/ui_image_generation.cpython-311.pyc and b/image_gen/__pycache__/ui_image_generation.cpython-311.pyc differ
|
|
|
image_to_text/__pycache__/image_to_text_translator.cpython-311.pyc
ADDED
|
Binary file (5.19 kB). View file
|
|
|
image_to_text/__pycache__/ui_image_to_text.cpython-311.pyc
ADDED
|
Binary file (5.52 kB). View file
|
|
|
image_to_text/image_to_text.py
ADDED
|
@@ -0,0 +1,132 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os, openai,string
|
| 2 |
+
import pandas as pd
|
| 3 |
+
from pydub import AudioSegment
|
| 4 |
+
from utilities import constants, api_keys, clean_text, prompt_constants
|
| 5 |
+
from textwrap import wrap
|
| 6 |
+
from moviepy.editor import VideoFileClip
|
| 7 |
+
from nltk.tokenize import word_tokenize
|
| 8 |
+
|
| 9 |
+
openai.api_key = api_keys.APIKeys().get_key('OPENAI_API_KEY')
|
| 10 |
+
key_words=set()
|
| 11 |
+
|
| 12 |
+
def CompletionEngine(sys_message, user_message, num_tokens, num_results, temperature, topic_model, top_p):
|
| 13 |
+
return openai.ChatCompletion.create(
|
| 14 |
+
model=topic_model,
|
| 15 |
+
messages=[
|
| 16 |
+
{"role": "system", "content": sys_message},
|
| 17 |
+
{"role": "user", "content": user_message}, ],
|
| 18 |
+
max_tokens=num_tokens,
|
| 19 |
+
n=num_results,
|
| 20 |
+
temperature=temperature,
|
| 21 |
+
stop=None,
|
| 22 |
+
top_p=top_p
|
| 23 |
+
)
|
| 24 |
+
|
| 25 |
+
#------------- #1: strip audio from video, create text from audio using OpenAI whisper-1 ----------------
|
| 26 |
+
def StripAndTranslateAudio(input_file):
|
| 27 |
+
if not os.path.exists(input_file):
|
| 28 |
+
return f"{input_file} {constants.FILE_DOES_NOT_EXIST}",[],pd.DataFrame({"role": [""], "content": [""] })
|
| 29 |
+
video = VideoFileClip(input_file)
|
| 30 |
+
audio = video.audio
|
| 31 |
+
if not input_file.endswith((".mp4", ".mov", ".avi", ".mkv")):
|
| 32 |
+
return constants.ANALYSIS_WRONG_FORMAT,[],pd.DataFrame({"role": [""], "content": [""] })
|
| 33 |
+
file_name=clean_text.CleanFileName(input_file)
|
| 34 |
+
transcript=""
|
| 35 |
+
messages=[]
|
| 36 |
+
#see if already transcribed, if so, return the transcript
|
| 37 |
+
if os.path.exists(constants.TRANSCRIPT_PATH+file_name+".txt"):
|
| 38 |
+
with open(constants.TRANSCRIPT_PATH+file_name+".txt", "r") as f:
|
| 39 |
+
transcript = f.read()
|
| 40 |
+
messages.append({"role": "system", "content": f"Sending audio file {file_name} to OpenAI whisper-1"})
|
| 41 |
+
messages.append({"role": "transcribe", "content": transcript})
|
| 42 |
+
else:
|
| 43 |
+
audio_file = constants.ORIGINALS_PATH+"audio_" + file_name + '.wav'
|
| 44 |
+
print(audio_file)
|
| 45 |
+
audio.write_audiofile(audio_file)
|
| 46 |
+
audio_segment = AudioSegment.from_file(audio_file, format="wav")
|
| 47 |
+
chunk_length = 60 * 1000 # 1 minute in milliseconds
|
| 48 |
+
chunks = [audio_segment[i:i + chunk_length] for i in range(0, len(audio_segment), chunk_length)]
|
| 49 |
+
full_transcript = ""
|
| 50 |
+
for i, chunk in enumerate(chunks):
|
| 51 |
+
chunk_audio_file = f"{constants.ORIGINALS_PATH}audio_chunk_{i}_{file_name}.wav"
|
| 52 |
+
chunk.export(chunk_audio_file, format="wav")
|
| 53 |
+
messages.append({"role": "system", "content": f"Sending audio chunk {i} to OpenAI whisper-1"})
|
| 54 |
+
with open(chunk_audio_file, 'rb') as f:
|
| 55 |
+
chunk_transcript = openai.Audio.transcribe("whisper-1", f)
|
| 56 |
+
full_transcript += chunk_transcript['text'] + " "
|
| 57 |
+
messages.append({"role": "transcribe", "content": chunk_transcript['text']})
|
| 58 |
+
os.remove(chunk_audio_file)
|
| 59 |
+
with open(constants.TRANSCRIPT_PATH + file_name + ".txt", "w") as f:
|
| 60 |
+
f.write(full_transcript)
|
| 61 |
+
os.remove(audio_file)
|
| 62 |
+
transcript = full_transcript
|
| 63 |
+
with open(constants.TRANSCRIPT_PATH + file_name + ".txt", "w") as f:
|
| 64 |
+
f.write(transcript)
|
| 65 |
+
return transcript, messages
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
#------------- 2: chunk & process transcripts using OpenAI gpt-3.5-turbo ----------------
|
| 69 |
+
def SummarizeLargeTranscript(transcript, messages):
|
| 70 |
+
while(NeedsChunks(transcript)):
|
| 71 |
+
text_chunks=CreateChunks(transcript)
|
| 72 |
+
summarized_text=SummarizeChunks(text_chunks,messages)
|
| 73 |
+
transcript=summarized_text
|
| 74 |
+
return transcript, messages
|
| 75 |
+
|
| 76 |
+
def NeedsChunks(transcript):
|
| 77 |
+
return len(transcript) > constants.CHUNK_LENGTH
|
| 78 |
+
|
| 79 |
+
def CreateChunks(transcript):
|
| 80 |
+
# Calculate the total length of the string and segment length
|
| 81 |
+
total_length = len(transcript)
|
| 82 |
+
segment_length = constants.CHUNK_LENGTH
|
| 83 |
+
segment_indices = [i for i in range(segment_length - 1, total_length, segment_length)]
|
| 84 |
+
text_chunks = []
|
| 85 |
+
start_idx = 0
|
| 86 |
+
for end_idx in segment_indices:
|
| 87 |
+
# Adjust end index backward to find a space or punctuation mark
|
| 88 |
+
while end_idx > start_idx and transcript[end_idx] not in string.whitespace + string.punctuation:
|
| 89 |
+
end_idx -= 1
|
| 90 |
+
if end_idx > start_idx:
|
| 91 |
+
text_chunks.append(transcript[start_idx:end_idx])
|
| 92 |
+
start_idx = end_idx + 1 # Skip the space or punctuation
|
| 93 |
+
if start_idx < total_length:
|
| 94 |
+
text_chunks.append(transcript[start_idx:])
|
| 95 |
+
return text_chunks
|
| 96 |
+
|
| 97 |
+
def SummarizeChunks(text_chunks, messages):
|
| 98 |
+
summarized_text_list = []
|
| 99 |
+
for chunk in text_chunks:
|
| 100 |
+
chunk_summary = SummarizeChunk(chunk, messages)
|
| 101 |
+
summarized_text_list.append(chunk_summary)
|
| 102 |
+
return " ".join(summarized_text_list)
|
| 103 |
+
|
| 104 |
+
def SummarizeChunk(chunk, messages):
|
| 105 |
+
chunk = clean_text.CleanText(chunk)
|
| 106 |
+
completion = CompletionEngine(prompt_constants.ANALYSIS_SYSTEM_PROMPT, chunk, constants.SUMMARY_TOKENS, constants.NUM_RESULTS, constants.TEMP, constants.ANALYSIS_MODEL, constants.TOP_P)
|
| 107 |
+
summary = completion.choices[0]['message']['content']
|
| 108 |
+
messages.append({"role": "assistant", "content": summary})
|
| 109 |
+
return summary
|
| 110 |
+
|
| 111 |
+
|
| 112 |
+
#------------- #3: find topics using OpenAI gpt-3.5-turbo ----------------
|
| 113 |
+
def FindTopics(transcript, messages):
|
| 114 |
+
messages.append({"role": "system", "content": prompt_constants.KEYWORD_SYSTEM_PROMPT})
|
| 115 |
+
topicCompletion=CompletionEngine(prompt_constants.KEYWORD_SYSTEM_PROMPT, transcript, constants.KEYWORD_TOKENS, constants.NUM_RESULTS, constants.TEMP, constants.ANALYSIS_MODEL, constants.TOP_P)
|
| 116 |
+
topics=topicCompletion.choices[0]['message']['content']
|
| 117 |
+
messages.append({"role": "assistant", "content": topics})
|
| 118 |
+
return topics, messages
|
| 119 |
+
|
| 120 |
+
def ProcessAudio(input_file):
|
| 121 |
+
#1: strip audio from video, create text from audio using OpenAI whisper-1
|
| 122 |
+
transcript,messages=StripAndTranslateAudio(input_file)
|
| 123 |
+
messages.append({"role": "system", "content": prompt_constants.ANALYSIS_SYSTEM_PROMPT})
|
| 124 |
+
|
| 125 |
+
#2: process transcript using OpenAI gpt-3.5-turbo
|
| 126 |
+
transcript,messages= SummarizeLargeTranscript(transcript,messages)
|
| 127 |
+
|
| 128 |
+
#3: find topics using OpenAI gpt-3.5-turbo
|
| 129 |
+
topics,messages = FindTopics(transcript,messages)
|
| 130 |
+
|
| 131 |
+
df = pd.DataFrame(messages)
|
| 132 |
+
return transcript, topics, df
|
image_to_text/image_to_text_translator.py
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import easyocr as ocr #OCR
|
| 2 |
+
from PIL import Image #Image Processing
|
| 3 |
+
import numpy as np #Image Processing
|
| 4 |
+
import pandas as pd
|
| 5 |
+
import gradio as gr
|
| 6 |
+
from utilities import constants
|
| 7 |
+
|
| 8 |
+
reader = None
|
| 9 |
+
|
| 10 |
+
def LoadModel():
|
| 11 |
+
global reader
|
| 12 |
+
if reader is None:
|
| 13 |
+
reader = ocr.Reader(['en'],model_storage_directory='.')
|
| 14 |
+
return reader
|
| 15 |
+
|
| 16 |
+
def ProcessImage(image):
|
| 17 |
+
global reader
|
| 18 |
+
reader = LoadModel()
|
| 19 |
+
|
| 20 |
+
# Debugging lines to find out what 'image' is
|
| 21 |
+
print(f"Type of image: {type(image)}")
|
| 22 |
+
if isinstance(image, np.ndarray):
|
| 23 |
+
print(f"Shape of image array: {image.shape}")
|
| 24 |
+
print(f"Data type of image array: {image.dtype}")
|
| 25 |
+
|
| 26 |
+
if image is not None:
|
| 27 |
+
if isinstance(image, np.ndarray):
|
| 28 |
+
input_image = Image.fromarray(image)
|
| 29 |
+
else:
|
| 30 |
+
input_image = Image.open(image)
|
| 31 |
+
result = reader.readtext(np.array(input_image))
|
| 32 |
+
result_text = []
|
| 33 |
+
for text in result:
|
| 34 |
+
result_text.append(text[1])
|
| 35 |
+
return result_text
|
| 36 |
+
|
| 37 |
+
def ImageIdentity(image):
|
| 38 |
+
transcript = ProcessImage(image)
|
| 39 |
+
return transcript
|
| 40 |
+
|
| 41 |
+
def Clear():
|
| 42 |
+
global imageBlock, summaryBlock, analysisDF
|
| 43 |
+
imageBlock=None
|
| 44 |
+
return "","",pd.DataFrame({"role": [""], "content": [""] })
|
| 45 |
+
|
| 46 |
+
with gr.Blocks() as ui:
|
| 47 |
+
label = gr.Label(show_label=False, value=constants.UI_6, container=False)
|
| 48 |
+
with gr.Row():
|
| 49 |
+
with gr.Column():
|
| 50 |
+
gr.Markdown(constants.PURPOSE_MD)
|
| 51 |
+
gr.Markdown(constants.AR_ADVISOR_PURPOSE)
|
| 52 |
+
with gr.Column():
|
| 53 |
+
gr.Markdown(constants.DIRECTIONS_MD)
|
| 54 |
+
gr.Markdown(constants.AR_ADVISOR_DIRECTIONS)
|
| 55 |
+
with gr.Row():
|
| 56 |
+
imageBlock = gr.Image(label=constants.IMAGE_INPUT, source="upload")
|
| 57 |
+
summaryBlock= gr.Textbox (label=constants.SUMMARY)
|
| 58 |
+
with gr.Row():
|
| 59 |
+
submitBtn=gr.Button(value=constants.EXTRACT_SUMMARY, variant="primary")
|
| 60 |
+
clearBtn=gr.Button(value=constants.CLEAR)
|
| 61 |
+
with gr.Row():
|
| 62 |
+
analysisDF=gr.DataFrame(type="pandas", value=pd.DataFrame({"role": [""], "content": [""] }),
|
| 63 |
+
wrap=True, show_label=False, label=constants.OPENAI_LOG)
|
| 64 |
+
submitBtn.click(ImageIdentity, inputs=[imageBlock], outputs=[summaryBlock])
|
| 65 |
+
clearBtn.click(Clear, inputs=[], outputs=[summaryBlock,analysisDF])
|
image_to_text/ui_image_to_text.py
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import re
|
| 4 |
+
from utilities import constants
|
| 5 |
+
from recording_analysis import recording_analysis
|
| 6 |
+
|
| 7 |
+
def InitDF():
|
| 8 |
+
global analysisDF
|
| 9 |
+
analysisDF=pd.DataFrame({"role": [""], "content": [""] })
|
| 10 |
+
|
| 11 |
+
def VideoIdentity(video):
|
| 12 |
+
transcript,topics,df = recording_analysis.ProcessAudio(video)
|
| 13 |
+
html=RenderBoxes(topics)
|
| 14 |
+
return transcript, html, df
|
| 15 |
+
|
| 16 |
+
def RenderBoxes(strings_list):
|
| 17 |
+
clean_list = [re.sub(r'^\d+\.\s*', '', topic) if re.match(r'^\d+\.\s*', topic) else topic for topic in strings_list.split('\n') if topic.strip()]
|
| 18 |
+
boxes_html = ""
|
| 19 |
+
for string in clean_list:
|
| 20 |
+
boxes_html += f'<div style="background-color: #107276; color: #ffffff; display: inline-block; font-size: 13pt; margin: 5px; padding: 10px;">{string}</div>'
|
| 21 |
+
return boxes_html
|
| 22 |
+
|
| 23 |
+
def ClearScreen():
|
| 24 |
+
global videoBlock, summaryBlock, topics, analysisDF
|
| 25 |
+
videoBlock=None
|
| 26 |
+
return "","",pd.DataFrame({"role": [""], "content": [""] })
|
| 27 |
+
|
| 28 |
+
with gr.Blocks() as ui:
|
| 29 |
+
label2 = gr.Label(show_label=False, value=constants.RECORDING_ANALYSIS, container=False)
|
| 30 |
+
with gr.Row():
|
| 31 |
+
with gr.Column():
|
| 32 |
+
gr.Markdown(constants.PURPOSE_MD)
|
| 33 |
+
gr.Markdown(constants.RECORDING_ANALYSIS_PURPOSE)
|
| 34 |
+
with gr.Column():
|
| 35 |
+
gr.Markdown(constants.DIRECTIONS_MD)
|
| 36 |
+
gr.Markdown(value=constants.RECORDING_ANALYSIS_DIRECTIONS)
|
| 37 |
+
|
| 38 |
+
with gr.Row():
|
| 39 |
+
videoBlock = gr.Video(label=constants.VIDEO_INPUT, source="upload")
|
| 40 |
+
summaryBlock= gr.Textbox (label=constants.SUMMARY)
|
| 41 |
+
with gr.Column():
|
| 42 |
+
gr.Markdown(constants.TOPICS_MD)
|
| 43 |
+
topics= gr.HTML ()
|
| 44 |
+
with gr.Row():
|
| 45 |
+
submitBtn=gr.Button(value=constants.EXTRACT_SUMMARY, variant="primary")
|
| 46 |
+
clearBtn=gr.Button(value=constants.CLEAR)
|
| 47 |
+
with gr.Row():
|
| 48 |
+
analysisDF=gr.DataFrame(type="pandas", value=pd.DataFrame({"role": [""], "content": [""] }),
|
| 49 |
+
wrap=True, show_label=False, label=constants.OPENAI_LOG)
|
| 50 |
+
submitBtn.click(VideoIdentity, inputs=[videoBlock], outputs=[summaryBlock,topics,analysisDF])
|
| 51 |
+
clearBtn.click(ClearScreen, inputs=[], outputs=[topics,summaryBlock,analysisDF])
|
| 52 |
+
|
| 53 |
+
InitDF()
|
recording_analysis/__pycache__/recording_analysis.cpython-311.pyc
CHANGED
|
Binary files a/recording_analysis/__pycache__/recording_analysis.cpython-311.pyc and b/recording_analysis/__pycache__/recording_analysis.cpython-311.pyc differ
|
|
|
recording_analysis/recording_analysis.py
CHANGED
|
@@ -22,14 +22,7 @@ def CompletionEngine(sys_message, user_message, num_tokens, num_results, tempera
|
|
| 22 |
top_p=top_p
|
| 23 |
)
|
| 24 |
|
| 25 |
-
|
| 26 |
-
chunk = clean_text.CleanText(chunk)
|
| 27 |
-
completion = CompletionEngine(prompt_constants.ANALYSIS_SYSTEM_PROMPT, chunk, constants.SUMMARY_TOKENS, constants.NUM_RESULTS, constants.TEMP, constants.ANALYSIS_MODEL, constants.TOP_P)
|
| 28 |
-
summary = completion.choices[0]['message']['content']
|
| 29 |
-
messages.append({"role": "assistant", "content": summary})
|
| 30 |
-
return summary
|
| 31 |
-
|
| 32 |
-
#1: strip audio from video, create text from audio using OpenAI whisper-1
|
| 33 |
def StripAndTranslateAudio(input_file):
|
| 34 |
if not os.path.exists(input_file):
|
| 35 |
return f"{input_file} {constants.FILE_DOES_NOT_EXIST}",[],pd.DataFrame({"role": [""], "content": [""] })
|
|
@@ -71,14 +64,13 @@ def StripAndTranslateAudio(input_file):
|
|
| 71 |
f.write(transcript)
|
| 72 |
return transcript, messages
|
| 73 |
|
| 74 |
-
|
|
|
|
| 75 |
def SummarizeLargeTranscript(transcript, messages):
|
| 76 |
while(NeedsChunks(transcript)):
|
| 77 |
text_chunks=CreateChunks(transcript)
|
| 78 |
summarized_text=SummarizeChunks(text_chunks,messages)
|
| 79 |
-
transcript=summarized_text
|
| 80 |
-
|
| 81 |
-
print("transcript: "+transcript)
|
| 82 |
return transcript, messages
|
| 83 |
|
| 84 |
def NeedsChunks(transcript):
|
|
@@ -109,7 +101,15 @@ def SummarizeChunks(text_chunks, messages):
|
|
| 109 |
summarized_text_list.append(chunk_summary)
|
| 110 |
return " ".join(summarized_text_list)
|
| 111 |
|
| 112 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
def FindTopics(transcript, messages):
|
| 114 |
messages.append({"role": "system", "content": prompt_constants.KEYWORD_SYSTEM_PROMPT})
|
| 115 |
topicCompletion=CompletionEngine(prompt_constants.KEYWORD_SYSTEM_PROMPT, transcript, constants.KEYWORD_TOKENS, constants.NUM_RESULTS, constants.TEMP, constants.ANALYSIS_MODEL, constants.TOP_P)
|
|
|
|
| 22 |
top_p=top_p
|
| 23 |
)
|
| 24 |
|
| 25 |
+
#------------- #1: strip audio from video, create text from audio using OpenAI whisper-1 ----------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
def StripAndTranslateAudio(input_file):
|
| 27 |
if not os.path.exists(input_file):
|
| 28 |
return f"{input_file} {constants.FILE_DOES_NOT_EXIST}",[],pd.DataFrame({"role": [""], "content": [""] })
|
|
|
|
| 64 |
f.write(transcript)
|
| 65 |
return transcript, messages
|
| 66 |
|
| 67 |
+
|
| 68 |
+
#------------- 2: chunk & process transcripts using OpenAI gpt-3.5-turbo ----------------
|
| 69 |
def SummarizeLargeTranscript(transcript, messages):
|
| 70 |
while(NeedsChunks(transcript)):
|
| 71 |
text_chunks=CreateChunks(transcript)
|
| 72 |
summarized_text=SummarizeChunks(text_chunks,messages)
|
| 73 |
+
transcript=summarized_text
|
|
|
|
|
|
|
| 74 |
return transcript, messages
|
| 75 |
|
| 76 |
def NeedsChunks(transcript):
|
|
|
|
| 101 |
summarized_text_list.append(chunk_summary)
|
| 102 |
return " ".join(summarized_text_list)
|
| 103 |
|
| 104 |
+
def SummarizeChunk(chunk, messages):
|
| 105 |
+
chunk = clean_text.CleanText(chunk)
|
| 106 |
+
completion = CompletionEngine(prompt_constants.ANALYSIS_SYSTEM_PROMPT, chunk, constants.SUMMARY_TOKENS, constants.NUM_RESULTS, constants.TEMP, constants.ANALYSIS_MODEL, constants.TOP_P)
|
| 107 |
+
summary = completion.choices[0]['message']['content']
|
| 108 |
+
messages.append({"role": "assistant", "content": summary})
|
| 109 |
+
return summary
|
| 110 |
+
|
| 111 |
+
|
| 112 |
+
#------------- #3: find topics using OpenAI gpt-3.5-turbo ----------------
|
| 113 |
def FindTopics(transcript, messages):
|
| 114 |
messages.append({"role": "system", "content": prompt_constants.KEYWORD_SYSTEM_PROMPT})
|
| 115 |
topicCompletion=CompletionEngine(prompt_constants.KEYWORD_SYSTEM_PROMPT, transcript, constants.KEYWORD_TOKENS, constants.NUM_RESULTS, constants.TEMP, constants.ANALYSIS_MODEL, constants.TOP_P)
|
requirements.txt
CHANGED
|
@@ -22,4 +22,5 @@ inflect==7.0.0
|
|
| 22 |
diffusers==0.10.2
|
| 23 |
moviepy==1.0.3
|
| 24 |
pdfplumber==0.10.2
|
| 25 |
-
pytz==2023.3
|
|
|
|
|
|
| 22 |
diffusers==0.10.2
|
| 23 |
moviepy==1.0.3
|
| 24 |
pdfplumber==0.10.2
|
| 25 |
+
pytz==2023.3
|
| 26 |
+
easyocr==1.7.1
|
utilities/__pycache__/constants.cpython-311.pyc
CHANGED
|
Binary files a/utilities/__pycache__/constants.cpython-311.pyc and b/utilities/__pycache__/constants.cpython-311.pyc differ
|
|
|
utilities/constants.py
CHANGED
|
@@ -13,10 +13,10 @@ RECORDING_ANALYSIS_DIRECTIONS="To get started, upload a video (.mp4) that you'd
|
|
| 13 |
SLACK_SENTIMENT_ANALYSIS_PURPOSE="Generative AI can help you understand the sentiment of your Slack messages. The 'Analyze Sentiment' action parses Slack content and uses OpenAI Chat Completion to score the sentiment of each message. The 'Sentiment' scatterplot shows the distribution of sentiment over the dataset."
|
| 14 |
SLACK_SENTIMENT_ANALYSIS_DIRECTIONS="To get started select one of the slack data options in 'Slack Examples' below, you'll see the Slack content in the 'Original' textbox. Next tap 'Analyze Sentiment'. ChatCompletion calls will provide sentiment scores for each message. The results will be displayed in the 'Dataframe' section. Finally, the distribution of sentiment over the dataset is shown in the 'Sentiment' scatterplot. Prompt history will display in the 'OpenAI Communication Log' box, below."
|
| 15 |
|
| 16 |
-
|
| 17 |
-
|
| 18 |
|
| 19 |
-
CHAT_BOT="
|
| 20 |
CHAT_BOT_INPUT="Ask a Question"
|
| 21 |
CHAT_BOT_HEIGHT=800
|
| 22 |
|
|
@@ -106,7 +106,8 @@ UI_2="Summary Extraction"
|
|
| 106 |
UI_3="Image Creation"
|
| 107 |
UI_4="Recording Analysis"
|
| 108 |
UI_5="Slack Sentiment Analysis"
|
| 109 |
-
UI_6="
|
|
|
|
| 110 |
|
| 111 |
HR_BENEFIT_PKL = 'utilities/data/embeddings/hr_asst_embeddings.pkl'
|
| 112 |
HR_BENEFIT_CSV = 'utilities/data/embeddings/hr_asst_data.csv'
|
|
@@ -154,6 +155,7 @@ GENERATE_IMAGES="Generate Images"
|
|
| 154 |
GENERATED_IMAGES="Generated Images"
|
| 155 |
|
| 156 |
TOPICS_MD="### Topics"
|
|
|
|
| 157 |
VIDEO_INPUT="Upload a Video(.mp4)"
|
| 158 |
EXTRACT_SUMMARY="Extract Summary"
|
| 159 |
RECORDING_ANALYSIS="Recording Analysis"
|
|
|
|
| 13 |
SLACK_SENTIMENT_ANALYSIS_PURPOSE="Generative AI can help you understand the sentiment of your Slack messages. The 'Analyze Sentiment' action parses Slack content and uses OpenAI Chat Completion to score the sentiment of each message. The 'Sentiment' scatterplot shows the distribution of sentiment over the dataset."
|
| 14 |
SLACK_SENTIMENT_ANALYSIS_DIRECTIONS="To get started select one of the slack data options in 'Slack Examples' below, you'll see the Slack content in the 'Original' textbox. Next tap 'Analyze Sentiment'. ChatCompletion calls will provide sentiment scores for each message. The results will be displayed in the 'Dataframe' section. Finally, the distribution of sentiment over the dataset is shown in the 'Sentiment' scatterplot. Prompt history will display in the 'OpenAI Communication Log' box, below."
|
| 15 |
|
| 16 |
+
AR_ADVISOR_PURPOSE = "This component demonstrates how a chatbot can leverage local embeddings to provide context. In this example, embeddings were created from USAA's 2022 Member Report provide organizational context for all interactions. This shows how generative AI enables any dataset to be queried and explored via natural language."
|
| 17 |
+
AR_ADVISOR_DIRECTIONS = "To get started, simply start asking questions. This bot has been trainged on USAA's 2022 Member Report and can answer questions about any content from the report. You can ask questions like: 'What's the best thing that happened last year?' or 'What was the biggest obstancle faced in 2022?'"
|
| 18 |
|
| 19 |
+
CHAT_BOT="Conversation Log"
|
| 20 |
CHAT_BOT_INPUT="Ask a Question"
|
| 21 |
CHAT_BOT_HEIGHT=800
|
| 22 |
|
|
|
|
| 106 |
UI_3="Image Creation"
|
| 107 |
UI_4="Recording Analysis"
|
| 108 |
UI_5="Slack Sentiment Analysis"
|
| 109 |
+
UI_6="Image to Text"
|
| 110 |
+
UI_7="PDF Query Chatbot"
|
| 111 |
|
| 112 |
HR_BENEFIT_PKL = 'utilities/data/embeddings/hr_asst_embeddings.pkl'
|
| 113 |
HR_BENEFIT_CSV = 'utilities/data/embeddings/hr_asst_data.csv'
|
|
|
|
| 155 |
GENERATED_IMAGES="Generated Images"
|
| 156 |
|
| 157 |
TOPICS_MD="### Topics"
|
| 158 |
+
IMAGE_INPUT="Upload an Image(.png, .jpg, .jpeg)"
|
| 159 |
VIDEO_INPUT="Upload a Video(.mp4)"
|
| 160 |
EXTRACT_SUMMARY="Extract Summary"
|
| 161 |
RECORDING_ANALYSIS="Recording Analysis"
|