Spaces:

TGSAI
/

Summariser

Sleeping

File size: 2,981 Bytes


#utils.py
import openai
import os
#import easyocr
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
import matplotlib.pyplot as plt
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import nltk
nltk.download('vader_lexicon')

#def ocr_reader(lan, png):
#    locs = []; words = []; confidence = []
#    try:
#        reader = easyocr.Reader([str(lan)]) #Initialise Language
#        result = reader.readtext(str(png))
#        for i in result:
#            locs.append(i[0])
#            words.append(i[1])
#            confidence.append(i[2])
#        return locs, words, confidence
#    except Exception as e:
#        print(e)
        
def get_completion(prompt, model="gpt-3.5-turbo"):
    messages = [{"role": "user", "content": prompt}]
    response = openai.ChatCompletion.create(
        model=model,
        messages=messages,
        temperature=0, # this is the degree of randomness of the model's output
    )
    content = response.choices[0].message["content"]
    token_dict = {
        'prompt_tokens':response['usage']['prompt_tokens'],
        'completion_tokens':response['usage']['completion_tokens'],
        'total_tokens':response['usage']['total_tokens'],
            }
    moderation_output = openai.Moderation.create(input=prompt)["results"][0]
    return content, token_dict, moderation_output

def get_radar(df):
    df['names'] = df.index
    fig = go.Figure()
    fig.add_trace(go.Scatterpolar(
                  r=df.category_scores,
                  theta=df.names,
                  fill='toself',
                  name='Moderation'))
    fig.update_layout(
        polar=dict(radialaxis=dict(
                      visible=True,
                      range=[0, 1])),
              showlegend=False)

    return fig

def plot_wordcloud(wc):
    fig, ax = plt.subplots(figsize = (12, 8))
    ax.imshow(wc, interpolation="bilinear")
    plt.axis("off")
    return fig

def get_sentiment(text):
    sentences = [' '.join(sent.split()).strip() for sent in text.split('.')]
    df = pd.DataFrame(sentences, columns=['content'])
    sid = SentimentIntensityAnalyzer()
    df['sentiment'] = df['content'].apply(lambda x: sid.polarity_scores(x))  
    df = pd.concat([df.drop(['sentiment'], axis=1), df['sentiment'].apply(pd.Series)], axis=1)
    df = df.rename(columns={'neu': 'neutral', 'neg': 'negative', 'pos': 'positive'})
    df['confidence'] = df[["negative", "neutral", "positive"]].max(axis=1)
    df['sentiment'] = df[["negative", "neutral", "positive"]].idxmax(axis=1)
    grouped = pd.DataFrame(df['sentiment'].value_counts()).reset_index()
    grouped.columns = ['sentiment', 'count']
    
    fig = px.scatter(df, y='sentiment', color='sentiment', size='confidence', 
                     hover_data=['content'], 
                     color_discrete_map={"negative": "firebrick", "neutral": "navajowhite", "positive": "darkgreen"})
    fig.update_layout(width=800,height=300,)
    return df, fig