MrJShen commited on
Commit
4f15ff1
·
1 Parent(s): c7dea47

Upload utils.py

Browse files

Utility functions

Files changed (1) hide show
  1. utils.py +83 -0
utils.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ #utils.py
3
+ import openai
4
+ import os
5
+ import easyocr
6
+ import pandas as pd
7
+ import plotly.graph_objects as go
8
+ import plotly.express as px
9
+ import matplotlib.pyplot as plt
10
+ from nltk.sentiment.vader import SentimentIntensityAnalyzer
11
+ import nltk
12
+ nltk.download('vader_lexicon')
13
+
14
+ def ocr_reader(lan, png):
15
+ locs = []; words = []; confidence = []
16
+ try:
17
+ reader = easyocr.Reader([str(lan)]) #Initialise Language
18
+ result = reader.readtext(str(png))
19
+ for i in result:
20
+ locs.append(i[0])
21
+ words.append(i[1])
22
+ confidence.append(i[2])
23
+ return locs, words, confidence
24
+ except Exception as e:
25
+ print(e)
26
+
27
+ def get_completion(prompt, model="gpt-3.5-turbo"):
28
+ messages = [{"role": "user", "content": prompt}]
29
+ response = openai.ChatCompletion.create(
30
+ model=model,
31
+ messages=messages,
32
+ temperature=0, # this is the degree of randomness of the model's output
33
+ )
34
+ content = response.choices[0].message["content"]
35
+ token_dict = {
36
+ 'prompt_tokens':response['usage']['prompt_tokens'],
37
+ 'completion_tokens':response['usage']['completion_tokens'],
38
+ 'total_tokens':response['usage']['total_tokens'],
39
+ }
40
+ moderation_output = openai.Moderation.create(input=prompt)["results"][0]
41
+ return content, token_dict, moderation_output
42
+
43
+ def get_radar(df):
44
+ df['names'] = df.index
45
+ fig = go.Figure()
46
+ fig.add_trace(go.Scatterpolar(
47
+ r=df.category_scores,
48
+ theta=df.names,
49
+ fill='toself',
50
+ name='Moderation'))
51
+ fig.update_layout(
52
+ polar=dict(radialaxis=dict(
53
+ visible=True,
54
+ range=[0, 1])),
55
+ showlegend=False)
56
+
57
+ return fig
58
+
59
+ def plot_wordcloud(wc):
60
+ fig, ax = plt.subplots(figsize = (12, 8))
61
+ ax.imshow(wc, interpolation="bilinear")
62
+ plt.axis("off")
63
+ return fig
64
+
65
+ def get_sentiment(text):
66
+ sentences = [' '.join(sent.split()).strip() for sent in text.split('.')]
67
+ df = pd.DataFrame(sentences, columns=['content'])
68
+ sid = SentimentIntensityAnalyzer()
69
+ df['sentiment'] = df['content'].apply(lambda x: sid.polarity_scores(x))
70
+ df = pd.concat([df.drop(['sentiment'], axis=1), df['sentiment'].apply(pd.Series)], axis=1)
71
+ df = df.rename(columns={'neu': 'neutral', 'neg': 'negative', 'pos': 'positive'})
72
+ df['confidence'] = df[["negative", "neutral", "positive"]].max(axis=1)
73
+ df['sentiment'] = df[["negative", "neutral", "positive"]].idxmax(axis=1)
74
+ grouped = pd.DataFrame(df['sentiment'].value_counts()).reset_index()
75
+ grouped.columns = ['sentiment', 'count']
76
+
77
+ fig = px.scatter(df, y='sentiment', color='sentiment', size='confidence',
78
+ hover_data=['content'],
79
+ color_discrete_map={"negative": "firebrick", "neutral": "navajowhite", "positive": "darkgreen"})
80
+ fig.update_layout(width=800,height=300,)
81
+ return df, fig
82
+
83
+