Summariser / utils.py
MrJShen's picture
Update utils.py
c973b79
#utils.py
import openai
import os
#import easyocr
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
import matplotlib.pyplot as plt
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import nltk
nltk.download('vader_lexicon')
#def ocr_reader(lan, png):
# locs = []; words = []; confidence = []
# try:
# reader = easyocr.Reader([str(lan)]) #Initialise Language
# result = reader.readtext(str(png))
# for i in result:
# locs.append(i[0])
# words.append(i[1])
# confidence.append(i[2])
# return locs, words, confidence
# except Exception as e:
# print(e)
def get_completion(prompt, model="gpt-3.5-turbo"):
messages = [{"role": "user", "content": prompt}]
response = openai.ChatCompletion.create(
model=model,
messages=messages,
temperature=0, # this is the degree of randomness of the model's output
)
content = response.choices[0].message["content"]
token_dict = {
'prompt_tokens':response['usage']['prompt_tokens'],
'completion_tokens':response['usage']['completion_tokens'],
'total_tokens':response['usage']['total_tokens'],
}
moderation_output = openai.Moderation.create(input=prompt)["results"][0]
return content, token_dict, moderation_output
def get_radar(df):
df['names'] = df.index
fig = go.Figure()
fig.add_trace(go.Scatterpolar(
r=df.category_scores,
theta=df.names,
fill='toself',
name='Moderation'))
fig.update_layout(
polar=dict(radialaxis=dict(
visible=True,
range=[0, 1])),
showlegend=False)
return fig
def plot_wordcloud(wc):
fig, ax = plt.subplots(figsize = (12, 8))
ax.imshow(wc, interpolation="bilinear")
plt.axis("off")
return fig
def get_sentiment(text):
sentences = [' '.join(sent.split()).strip() for sent in text.split('.')]
df = pd.DataFrame(sentences, columns=['content'])
sid = SentimentIntensityAnalyzer()
df['sentiment'] = df['content'].apply(lambda x: sid.polarity_scores(x))
df = pd.concat([df.drop(['sentiment'], axis=1), df['sentiment'].apply(pd.Series)], axis=1)
df = df.rename(columns={'neu': 'neutral', 'neg': 'negative', 'pos': 'positive'})
df['confidence'] = df[["negative", "neutral", "positive"]].max(axis=1)
df['sentiment'] = df[["negative", "neutral", "positive"]].idxmax(axis=1)
grouped = pd.DataFrame(df['sentiment'].value_counts()).reset_index()
grouped.columns = ['sentiment', 'count']
fig = px.scatter(df, y='sentiment', color='sentiment', size='confidence',
hover_data=['content'],
color_discrete_map={"negative": "firebrick", "neutral": "navajowhite", "positive": "darkgreen"})
fig.update_layout(width=800,height=300,)
return df, fig