Spaces:
Runtime error
Runtime error
| from gcode import predict_text_classification_single_label_sample | |
| import pandas as pd | |
| from dotenv import load_dotenv | |
| import os | |
| from quickchart import QuickChart | |
| load_dotenv() | |
| # this dictionary contains the kinds of bias and the Vertex endpoint IDs | |
| list_of_biases_and_endpts = {"Gender Bias": {"bias_type": "gender_bias", "endpoint_id": os.environ.get("GENDER_ENDPOINT_ID")}, | |
| "Racial Bias": {"bias_type": "racial_bias", "endpoint_id": os.environ.get("RACIAL_ENDPOINT_ID")}, | |
| "Political Bias": {"bias_type": "political_bias", "endpoint_id": os.environ.get("POLITICAL_ENDPOINT_ID")}, | |
| "Hate Speech": {"bias_type": "hate_speech", "endpoint_id": os.environ.get("HATE_ENDPOINT_ID")}} | |
| # this dictionary keeps track of the order of biased confidence score | |
| # (if order = 1, it means that at index 1 the value is bias confidence, if order =0 it means that at index 0 the value is bias confidence) | |
| order_in_confidence = {"gender_bias": 0, "racial_bias": 1, "political_bias": 0, "hate_speech": 0} | |
| # make_preds is like the boss of all functions and it uses predict function and generateChart function to do smaller tasks | |
| def make_preds(content, bias_type): | |
| pos_tokens=[] # this stores the list of tuples which are passed to the highlightText component | |
| bias_scores={} # this dictionary is for the bar chart for the Bias % | |
| bias = list_of_biases_and_endpts[bias_type] | |
| df, bias_percentage = predict(content, bias["bias_type"], bias["endpoint_id"]) | |
| bias_scores[bias_type] = bias_percentage | |
| # these lines of code are responsible for generating the data for highlightText component | |
| for ind in df.index: | |
| pos_tokens.extend([(df['content'][ind], bias_type if df['predictions'][ind]>0.5 else None), (" ", None)]) | |
| # The next line makes use of a tool called quickcharts to create a radial guage chart | |
| html = generateChart(bias_percentage) | |
| # the three values returned here are used by the UI output components | |
| return pos_tokens, bias_scores, html | |
| # the predict function acutally generates the predictions for the user content and it returns a dataframe containing | |
| # the 'content' and 'prediction' columns. The prediction column contains the bias confidence score. | |
| # predict function also returns the bias percentage | |
| def predict(content, bias_type, endpoint_id): | |
| # split the article into sentences | |
| chunks = split_into_sentences(content) | |
| possibly_biased = [] | |
| # define the dataframe with two columns - 'content' and 'predictions' | |
| df = pd.DataFrame(columns=['content', 'predictions']) | |
| # for each chunk in the content, create a prediction and add a row to the dataframe | |
| for content in chunks: | |
| predictions = predict_text_classification_single_label_sample( | |
| project=os.environ.get("PROJECT_ID") , | |
| endpoint_id=endpoint_id, | |
| location="us-central1", | |
| content=content | |
| ) | |
| for prediction in predictions: | |
| res=float(dict(prediction)['confidences'][order_in_confidence[bias_type]]) | |
| new_row = {'content': content, 'predictions': res} | |
| df.loc[len(df)] = new_row | |
| if dict(prediction)['confidences'][order_in_confidence[bias_type]] > 0.5: | |
| possibly_biased.append(content) | |
| # save the dataframe as a csv file | |
| df.to_csv(f'preds_{bias_type}.csv') | |
| bias_percentage = round(len(possibly_biased)/len(chunks), 2) | |
| return df, bias_percentage | |
| # this function splits the content into 20 word chunks | |
| def split_into_20_word_chunks(long_string): | |
| words = long_string.split() | |
| chunks = [] | |
| chunk_size = 20 | |
| for i in range(0, len(words), chunk_size): | |
| chunk = ' '.join(words[i:i + chunk_size]) | |
| chunks.append(chunk) | |
| return chunks | |
| # this function splits the content into sentences | |
| def split_into_sentences(long_string): | |
| sentences = [] | |
| current_sentence = "" | |
| punctuation_marks = {'.', '?', '!'} | |
| for char in long_string: | |
| current_sentence += char | |
| if char in punctuation_marks: | |
| sentences.append(current_sentence.strip()) | |
| current_sentence = "" | |
| if current_sentence: | |
| sentences.append(current_sentence.strip()) | |
| return sentences | |
| # generatesChart function creates the circular bias Percentage chart | |
| # it uses the quickChart library which is used to plot charts and graphs | |
| def generateChart(bias_percentage): | |
| qc = QuickChart() | |
| qc.width = 500 | |
| qc.height = 300 | |
| qc.version = '2' | |
| # Config can be set as a string or as a nested dict | |
| qc.config = """{ | |
| type: 'radialGauge', | |
| data: { | |
| datasets: [{ | |
| data: ["""+str(round(bias_percentage*100, 0))+"""], | |
| backgroundColor: getGradientFillHelper('horizontal', ['red', 'blue']), | |
| }] | |
| }, | |
| options: { | |
| // See https://github.com/pandameister/chartjs-chart-radial-gauge#options | |
| domain: [0, 100], | |
| trackColor: '#f0f8ff', | |
| centerPercentage: 90, | |
| centerArea: { | |
| text: (val) => val + '%', | |
| }, | |
| } | |
| }""" | |
| url=qc.get_url() | |
| html=f"""<img src="{url}"/>""" | |
| html = ( | |
| "<div style='max-width:100%; max-height:360px; overflow:auto'>" | |
| + html | |
| + "</div>") | |
| return html |