Spaces:
Sleeping
Sleeping
| #utils.py | |
| import openai | |
| import os | |
| #import easyocr | |
| import pandas as pd | |
| import plotly.graph_objects as go | |
| import plotly.express as px | |
| import matplotlib.pyplot as plt | |
| from nltk.sentiment.vader import SentimentIntensityAnalyzer | |
| import nltk | |
| nltk.download('vader_lexicon') | |
| #def ocr_reader(lan, png): | |
| # locs = []; words = []; confidence = [] | |
| # try: | |
| # reader = easyocr.Reader([str(lan)]) #Initialise Language | |
| # result = reader.readtext(str(png)) | |
| # for i in result: | |
| # locs.append(i[0]) | |
| # words.append(i[1]) | |
| # confidence.append(i[2]) | |
| # return locs, words, confidence | |
| # except Exception as e: | |
| # print(e) | |
| def get_completion(prompt, model="gpt-3.5-turbo"): | |
| messages = [{"role": "user", "content": prompt}] | |
| response = openai.ChatCompletion.create( | |
| model=model, | |
| messages=messages, | |
| temperature=0, # this is the degree of randomness of the model's output | |
| ) | |
| content = response.choices[0].message["content"] | |
| token_dict = { | |
| 'prompt_tokens':response['usage']['prompt_tokens'], | |
| 'completion_tokens':response['usage']['completion_tokens'], | |
| 'total_tokens':response['usage']['total_tokens'], | |
| } | |
| moderation_output = openai.Moderation.create(input=prompt)["results"][0] | |
| return content, token_dict, moderation_output | |
| def get_radar(df): | |
| df['names'] = df.index | |
| fig = go.Figure() | |
| fig.add_trace(go.Scatterpolar( | |
| r=df.category_scores, | |
| theta=df.names, | |
| fill='toself', | |
| name='Moderation')) | |
| fig.update_layout( | |
| polar=dict(radialaxis=dict( | |
| visible=True, | |
| range=[0, 1])), | |
| showlegend=False) | |
| return fig | |
| def plot_wordcloud(wc): | |
| fig, ax = plt.subplots(figsize = (12, 8)) | |
| ax.imshow(wc, interpolation="bilinear") | |
| plt.axis("off") | |
| return fig | |
| def get_sentiment(text): | |
| sentences = [' '.join(sent.split()).strip() for sent in text.split('.')] | |
| df = pd.DataFrame(sentences, columns=['content']) | |
| sid = SentimentIntensityAnalyzer() | |
| df['sentiment'] = df['content'].apply(lambda x: sid.polarity_scores(x)) | |
| df = pd.concat([df.drop(['sentiment'], axis=1), df['sentiment'].apply(pd.Series)], axis=1) | |
| df = df.rename(columns={'neu': 'neutral', 'neg': 'negative', 'pos': 'positive'}) | |
| df['confidence'] = df[["negative", "neutral", "positive"]].max(axis=1) | |
| df['sentiment'] = df[["negative", "neutral", "positive"]].idxmax(axis=1) | |
| grouped = pd.DataFrame(df['sentiment'].value_counts()).reset_index() | |
| grouped.columns = ['sentiment', 'count'] | |
| fig = px.scatter(df, y='sentiment', color='sentiment', size='confidence', | |
| hover_data=['content'], | |
| color_discrete_map={"negative": "firebrick", "neutral": "navajowhite", "positive": "darkgreen"}) | |
| fig.update_layout(width=800,height=300,) | |
| return df, fig | |