Kim Adams commited on
Commit
26ead64
·
1 Parent(s): 6f0fdbf

added NLP summary component

Browse files
app.py CHANGED
@@ -10,6 +10,7 @@ from prompts import ui_prompt_builder
10
  from chat_bot import ui_simple_chat
11
  from slack_sentiment_analysis import ui_sentiment_analysis
12
  from image_to_text import image_to_text_translator as ui_image_to_text
 
13
 
14
  openai.api_key = api_keys.APIKeys().get_key('OPENAI_API_KEY')
15
 
@@ -20,9 +21,10 @@ video_analysis=ui_recording_analysis.ui
20
  slack_sentiment=ui_sentiment_analysis.ui
21
  usaa_advisor=ui_simple_chat.ui
22
  image_to_text=ui_image_to_text.ui
 
23
 
24
- ui = gr.TabbedInterface([prompt_builder, summarize, image_generator, video_analysis,slack_sentiment, usaa_advisor],
25
- (constants.UI_1, constants.UI_2, constants.UI_3,constants.UI_4, constants.UI_5,constants.UI_7),
26
  theme=SoftBlue())
27
 
28
- ui.launch()
 
10
  from chat_bot import ui_simple_chat
11
  from slack_sentiment_analysis import ui_sentiment_analysis
12
  from image_to_text import image_to_text_translator as ui_image_to_text
13
+ from text_analysis import ui_text_analysis
14
 
15
  openai.api_key = api_keys.APIKeys().get_key('OPENAI_API_KEY')
16
 
 
21
  slack_sentiment=ui_sentiment_analysis.ui
22
  usaa_advisor=ui_simple_chat.ui
23
  image_to_text=ui_image_to_text.ui
24
+ text_analysis=ui_text_analysis.ui
25
 
26
+ ui = gr.TabbedInterface([prompt_builder, text_analysis, image_generator, video_analysis,slack_sentiment, usaa_advisor, summarize],
27
+ (constants.UI_1, constants.UI_2, constants.UI_3,constants.UI_4, constants.UI_5,constants.UI_7, constants.UI_8),
28
  theme=SoftBlue())
29
 
30
+ ui.launch()
craft_mlt_25k.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:4a5efbfb48b4081100544e75e1e2b57f8de3d84f213004b14b85fd4b3748db17
3
- size 83152330
 
 
 
 
english_g2.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:e2272681d9d67a04e2dff396b6e95077bc19001f8f6d3593c307b9852e1c29e8
3
- size 15143997
 
 
 
 
image_to_text/__pycache__/image_to_text_translator.cpython-311.pyc CHANGED
Binary files a/image_to_text/__pycache__/image_to_text_translator.cpython-311.pyc and b/image_to_text/__pycache__/image_to_text_translator.cpython-311.pyc differ
 
recording_analysis/__pycache__/ui_recording_analysis.cpython-311.pyc CHANGED
Binary files a/recording_analysis/__pycache__/ui_recording_analysis.cpython-311.pyc and b/recording_analysis/__pycache__/ui_recording_analysis.cpython-311.pyc differ
 
requirements.txt CHANGED
@@ -24,4 +24,5 @@ moviepy==1.0.3
24
  pdfplumber==0.10.2
25
  pytz==2023.3
26
  easyocr==1.7.1
27
- ffmpeg==1.4
 
 
24
  pdfplumber==0.10.2
25
  pytz==2023.3
26
  easyocr==1.7.1
27
+ ffmpeg==1.4
28
+ spacy==3.7.2
setup.sh ADDED
@@ -0,0 +1 @@
 
 
1
+ python -m spacy download en_core_web_sm
text_analysis/__pycache__/ui_text_analysis.cpython-311.pyc ADDED
Binary file (5.75 kB). View file
 
text_analysis/text_analysis.py ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import openai, json, re, random
2
+ import pandas as pd
3
+ from utilities import date_format, prompt_constants
4
+
5
+ def Completion(slack_message):
6
+ response = openai.ChatCompletion.create(
7
+ model="gpt-3.5-turbo",
8
+ messages=[
9
+ {"role": "system", "content": prompt_constants.SLACK_SENTIMENT_SYSTEM_PROMPT},
10
+ {"role": "user", "content": slack_message} ])
11
+ print("response")
12
+ print(response["choices"][0]["message"]["content"])
13
+ return response["choices"][0]["message"]["content"]
14
+
15
+ def sanitize_blob(blob_str):
16
+ return re.sub(r"(?<=: )'", '"', re.sub(r"'(?=:)", '"', blob_str))
17
+
18
+ def FindScore(response):
19
+ match = re.search(r"\b(0(\.\d+)?|1(\.0+)?)\b", response)
20
+ random_offset = round(random.uniform(0.01, .099), 3)
21
+ if match:
22
+ value = round(float(match.group(1)), 2)
23
+ return value + random_offset
24
+ else:
25
+ return 0
26
+
27
+ def CheckType(response):
28
+ if isinstance(response, float):
29
+ return round(response, 2)
30
+ elif isinstance(response, str):
31
+ return FindScore(response)
32
+
33
+ def ProcessMessage(message, summary_messages, slack_messages, id, parent_user=None ):
34
+ user = message["user"]
35
+ message_text = message["text"]
36
+ timestamp = message["timestamp"]
37
+ response = Completion(message_text) # Assuming Completion is defined elsewhere
38
+ summary_messages.append({"role": "user", "content": message_text})
39
+ summary_messages.append({"role": "assistant", "content": response})
40
+
41
+ sentiment_score = CheckType(response) # Assuming CheckType is defined elsewhere
42
+
43
+ sentiment = "Neutral"
44
+ if sentiment_score == 0:
45
+ sentiment = "Undefined"
46
+ elif 0 < sentiment_score < 0.3:
47
+ sentiment = "Negative"
48
+ elif sentiment_score > 0.6:
49
+ sentiment = "Positive"
50
+
51
+ dateX, timeX, twentyfour_time = date_format.TimeStampToDateAndTime(timestamp)
52
+
53
+ message_obj = {
54
+ "id": id,
55
+ "user": user,
56
+ "message": f"{message_text}",
57
+ "date": dateX +": " +twentyfour_time,
58
+ "time": timestamp,
59
+ "twentyfour_time": twentyfour_time,
60
+ "sentiment_score": sentiment_score,
61
+ "sentiment": sentiment,
62
+ "size": 8,
63
+ "parent_user": parent_user
64
+ }
65
+ id=id+1
66
+ slack_messages.append(message_obj)
67
+ # Process nested replies if any
68
+ if "replies" in message:
69
+ for reply in message["replies"]:
70
+ ProcessMessage(reply, summary_messages, slack_messages, id, parent_user=user)
71
+ id=id+1
72
+
73
+ def ParseBlobs(blob, summary_messages):
74
+ global id
75
+ sanitized_blob = sanitize_blob(blob)
76
+ try:
77
+ response_data = json.loads(sanitized_blob)
78
+ except json.JSONDecodeError:
79
+ print("Invalid JSON format.")
80
+ return None
81
+
82
+ slack_messages = []
83
+ summary_messages.append({"role": "system", "content": prompt_constants.SLACK_SENTIMENT_SYSTEM_PROMPT})
84
+ for message in response_data["messages"]:
85
+ ProcessMessage(message,summary_messages,slack_messages, id)
86
+ id=id+1
87
+ jsonobj = json.dumps(slack_messages, ensure_ascii=False)
88
+ return jsonobj,summary_messages
89
+
90
+ def AnalyzeSentiment(blob):
91
+ global id
92
+ summary_messages = []
93
+ id=3
94
+ slack_blobs,summary_messages=ParseBlobs(blob,summary_messages)
95
+ df = pd.DataFrame(summary_messages)
96
+ sentimentDF=pd.read_json(slack_blobs)
97
+ return df, sentimentDF, id+3
text_analysis/ui_text_analysis.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import gradio as gr
3
+ import os
4
+ import base64
5
+ import spacy
6
+ from utilities import constants
7
+ from spacy import displacy
8
+ os.system('python -m spacy download en_core_web_sm')
9
+ nlp = spacy.load("en_core_web_sm")
10
+
11
+ input_examples=[
12
+ "Every day may not be good, but there's something good in every day.",
13
+ "The best preparation for tomorrow is doing your best today.",
14
+ "Believe you can, and you're halfway there."]
15
+
16
+ def AnalyzeText(text):
17
+ doc = nlp(text)
18
+ svg = displacy.render(doc, style='dep')
19
+ svg_base64_encoded = base64.b64encode(svg.encode('utf-8')).decode('utf-8')
20
+ nlp_html = f"""
21
+ <div style='display: flex; justify-content: center; align-items: center; width: 100%; overflow-x: auto;'>
22
+ <img src="data:image/svg+xml;base64,{svg_base64_encoded}"
23
+ style='display: block; max-width: 100%; height: auto; margin: auto;'/>
24
+ </div>
25
+ """
26
+ pos_count = {
27
+ "char_count": len(text),
28
+ "token_count": len(doc)
29
+ }
30
+ pos_tokens = []
31
+ for token in doc:
32
+ pos_tokens.extend([(token.text, token.pos_), (" ", None)])
33
+ return pos_tokens, pos_count, nlp_html
34
+
35
+ def Clear(clearBtn):
36
+ return(constants.NLP_PROMPT, [], {}, [])
37
+
38
+ with gr.Blocks() as ui:
39
+ label = gr.Label(show_label=False, value=constants.TEXT_ANALYSIS, container=False)
40
+ with gr.Column():
41
+ with gr.Row():
42
+ with gr.Column():
43
+ gr.Markdown(constants.PURPOSE_MD)
44
+ gr.Markdown(constants.NLP_ANALYSIS_PURPOSE)
45
+ with gr.Column():
46
+ gr.Markdown(constants.DIRECTIONS_MD)
47
+ gr.Markdown(value=constants.NLP_ANALYSIS_DIRECTIONS)
48
+ with gr.Row():
49
+ with gr.Column():
50
+ inputString=gr.Textbox(placeholder=constants.NLP_PROMPT,
51
+ label="Input Text", lines=3, height="500",
52
+ interactive=True)
53
+ with gr.Row():
54
+ clearBtn=gr.Button(constants.CLEAR, variant="secondary")
55
+ submitBtn=gr.Button(constants.SUBMIT, variant="primary")
56
+ with gr.Column():
57
+ posTags=gr.HighlightedText(label=constants.TOKENS)
58
+ gr.Markdown(constants.NLP_ANALYSIS_MD)
59
+ posCount=gr.JSON()
60
+
61
+ inputExampleSelect = gr.Examples(input_examples,inputs=[inputString],label="Or select an example." )
62
+ gr.Markdown(constants.NLP_POS_MAP_MD)
63
+ posTokens=gr.HTML()
64
+ submitBtn.click(AnalyzeText, inputs=[inputString], outputs=[posTags,posCount,posTokens])
65
+ clearBtn.click(Clear, inputs=[clearBtn], outputs=[inputString,posTags,posCount,posTokens])
ui/__pycache__/app_theme.cpython-311.pyc CHANGED
Binary files a/ui/__pycache__/app_theme.cpython-311.pyc and b/ui/__pycache__/app_theme.cpython-311.pyc differ
 
utilities/__pycache__/constants.cpython-311.pyc CHANGED
Binary files a/utilities/__pycache__/constants.cpython-311.pyc and b/utilities/__pycache__/constants.cpython-311.pyc differ
 
utilities/__pycache__/prompt_constants.cpython-311.pyc CHANGED
Binary files a/utilities/__pycache__/prompt_constants.cpython-311.pyc and b/utilities/__pycache__/prompt_constants.cpython-311.pyc differ
 
utilities/constants.py CHANGED
@@ -1,3 +1,8 @@
 
 
 
 
 
1
  PROMPT_DESCRIPTION="Explore how generative AI can be used to create content. Personas let you explore the variety of roles the system can take on. Translation options specify the language for your response. Voice options are an assortment of AI voices that will bring messages to life. Check the Sentiment box to see how the system identifies sentiment from conversation. Check the emotion to see how the sytem detects your emotion."
2
  PROMPT_DIRECTIONS="To get started, select a Persona, Language, and Voice. If you'd like Sentiment or Emotion analysis, check the respective box. Tap the 'Record from microphone' button and say what's on your mind, tap 'Stop recording' when your done. If you're happy with what you've said, tap 'Submit' to send your request, otherwise tap 'Clear' to record a new message."
3
 
@@ -13,6 +18,9 @@ RECORDING_ANALYSIS_DIRECTIONS="To get started, upload a video (.mp4) that you'd
13
  SLACK_SENTIMENT_ANALYSIS_PURPOSE="Generative AI can help you understand the sentiment of your Slack messages. The 'Analyze Sentiment' action parses Slack content and uses OpenAI Chat Completion to score the sentiment of each message. The 'Sentiment' scatterplot shows the distribution of sentiment over the dataset."
14
  SLACK_SENTIMENT_ANALYSIS_DIRECTIONS="To get started select one of the slack data options in 'Slack Examples' below, you'll see the Slack content in the 'Original' textbox. Next tap 'Analyze Sentiment'. ChatCompletion calls will provide sentiment scores for each message. The results will be displayed in the 'Dataframe' section. Finally, the distribution of sentiment over the dataset is shown in the 'Sentiment' scatterplot. Prompt history will display in the 'OpenAI Communication Log' box, below."
15
 
 
 
 
16
  AR_ADVISOR_PURPOSE = "This component demonstrates how a chatbot can leverage local embeddings to provide context. In this example, embeddings were created from USAA's 2022 Member Report provide organizational context for all interactions. This shows how generative AI enables any dataset to be queried and explored via natural language."
17
  AR_ADVISOR_DIRECTIONS = "To get started, simply start asking questions. This bot has been trainged on USAA's 2022 Member Report and can answer questions about any content from the report. You can ask questions like: 'What's the best thing that happened last year?' or 'What was the biggest obstancle faced in 2022?'"
18
 
@@ -102,12 +110,13 @@ QUESTIONS_AR_EXPERT="How was 2022? What was one of the biggest challenges? What
102
 
103
  #tab navigator labels
104
  UI_1="Prompt Builder"
105
- UI_2="Summary Extraction"
106
  UI_3="Image Creation"
107
  UI_4="Recording Analysis"
108
  UI_5="Slack Sentiment Analysis"
109
  UI_6="Image to Text"
110
  UI_7="PDF Query Chatbot"
 
111
 
112
  HR_BENEFIT_PKL = 'utilities/data/embeddings/hr_asst_embeddings.pkl'
113
  HR_BENEFIT_CSV = 'utilities/data/embeddings/hr_asst_data.csv'
@@ -135,15 +144,14 @@ SUMMARIZATION="Summarization"
135
  SUMMARIZE="Summarize"
136
  SUMMARIZATION_TRUNCATION="***Code was truncated. Only first 14000 characters are included in the summary.***\n\n"
137
  CHAR_COUNT="\n\nNum Characters: "
138
-
 
 
139
  FILE_DOES_NOT_EXIST="does not exist."
140
  ORIGINAL_DOC="Original Document"
141
  SUMMARIZE_INFO="Pick a document to summarize."
142
  CODE_DOC_EXAMPLES_MD="### Code & Document Examples"
143
 
144
- SUMMARY_MD_HEAD="## Summary"
145
- SUMMARY_MD_SUBHEAD="##### Understand legacy software, complex or technical documents with summarization."
146
- SUMMARY_MD_DIRECTIONS="Enter text or select one of the Code & Document Examples, below. Then tap Summarize."
147
 
148
  IMAGE_PROMPT="Image Prompt"
149
  IMAGE_PLACEHOLDER="Enter a description of what you'd like to see."
 
1
+
2
+ SUMMARY_MD_HEAD="## Summary"
3
+ SUMMARY_MD_SUBHEAD="##### Understand legacy software, complex or technical documents with summarization."
4
+ SUMMARY_MD_DIRECTIONS="Enter text or select one of the Code & Document Examples, below. Then tap Summarize."
5
+
6
  PROMPT_DESCRIPTION="Explore how generative AI can be used to create content. Personas let you explore the variety of roles the system can take on. Translation options specify the language for your response. Voice options are an assortment of AI voices that will bring messages to life. Check the Sentiment box to see how the system identifies sentiment from conversation. Check the emotion to see how the sytem detects your emotion."
7
  PROMPT_DIRECTIONS="To get started, select a Persona, Language, and Voice. If you'd like Sentiment or Emotion analysis, check the respective box. Tap the 'Record from microphone' button and say what's on your mind, tap 'Stop recording' when your done. If you're happy with what you've said, tap 'Submit' to send your request, otherwise tap 'Clear' to record a new message."
8
 
 
18
  SLACK_SENTIMENT_ANALYSIS_PURPOSE="Generative AI can help you understand the sentiment of your Slack messages. The 'Analyze Sentiment' action parses Slack content and uses OpenAI Chat Completion to score the sentiment of each message. The 'Sentiment' scatterplot shows the distribution of sentiment over the dataset."
19
  SLACK_SENTIMENT_ANALYSIS_DIRECTIONS="To get started select one of the slack data options in 'Slack Examples' below, you'll see the Slack content in the 'Original' textbox. Next tap 'Analyze Sentiment'. ChatCompletion calls will provide sentiment scores for each message. The results will be displayed in the 'Dataframe' section. Finally, the distribution of sentiment over the dataset is shown in the 'Sentiment' scatterplot. Prompt history will display in the 'OpenAI Communication Log' box, below."
20
 
21
+ NLP_ANALYSIS_PURPOSE="Natural Language Processing (NLP) enables us to understand the structure and meaning in text. This view provides some understanding of NLP under the hood of Generative AI."
22
+ NLP_ANALYSIS_DIRECTIONS="Start by entering a sentence or paragraph in the 'Text' box. Then tap 'Analyze'. You'll see token counts, part-of-speech tags, and a visual representation of the analysis."
23
+ NLP_PROMPT="Enter text to analyze..."
24
  AR_ADVISOR_PURPOSE = "This component demonstrates how a chatbot can leverage local embeddings to provide context. In this example, embeddings were created from USAA's 2022 Member Report provide organizational context for all interactions. This shows how generative AI enables any dataset to be queried and explored via natural language."
25
  AR_ADVISOR_DIRECTIONS = "To get started, simply start asking questions. This bot has been trainged on USAA's 2022 Member Report and can answer questions about any content from the report. You can ask questions like: 'What's the best thing that happened last year?' or 'What was the biggest obstancle faced in 2022?'"
26
 
 
110
 
111
  #tab navigator labels
112
  UI_1="Prompt Builder"
113
+ UI_8="Summary Extraction"
114
  UI_3="Image Creation"
115
  UI_4="Recording Analysis"
116
  UI_5="Slack Sentiment Analysis"
117
  UI_6="Image to Text"
118
  UI_7="PDF Query Chatbot"
119
+ UI_2="NLP Text Analysis"
120
 
121
  HR_BENEFIT_PKL = 'utilities/data/embeddings/hr_asst_embeddings.pkl'
122
  HR_BENEFIT_CSV = 'utilities/data/embeddings/hr_asst_data.csv'
 
144
  SUMMARIZE="Summarize"
145
  SUMMARIZATION_TRUNCATION="***Code was truncated. Only first 14000 characters are included in the summary.***\n\n"
146
  CHAR_COUNT="\n\nNum Characters: "
147
+ TOKENS="Tokens"
148
+ NLP_ANALYSIS_MD="### NLP Analysis"
149
+ NLP_POS_MAP_MD="### NLP POS Map"
150
  FILE_DOES_NOT_EXIST="does not exist."
151
  ORIGINAL_DOC="Original Document"
152
  SUMMARIZE_INFO="Pick a document to summarize."
153
  CODE_DOC_EXAMPLES_MD="### Code & Document Examples"
154
 
 
 
 
155
 
156
  IMAGE_PROMPT="Image Prompt"
157
  IMAGE_PLACEHOLDER="Enter a description of what you'd like to see."