| import os |
| import string |
| import numpy as np |
| import matplotlib.pyplot as plt |
| from sklearn.metrics import confusion_matrix |
| import seaborn as sns |
| import streamlit as st |
| import base64 |
|
|
|
|
| def get_lines(txt_url): |
| """ |
| Read data txt file |
| """ |
| with open(txt_url, "r") as f: |
| return f.readlines() |
|
|
|
|
|
|
| def get_information(file_dir): |
| """ |
| Create features from data |
| Each sentence in an abstract has: |
| - target: Class label |
| - text: raw text |
| - line id: Numeral order of a sentence in an abstract |
| - line_length: Length of text |
| - total_lines: Number of sentences in an abstract |
| """ |
|
|
| input_lines = get_lines(file_dir) |
|
|
| abstract_samples = [] |
| lines_in_one_abstract = "" |
| for line in input_lines: |
| |
| if line.startswith("###"): |
| abstract_id = line[4:-1] |
| one_abstract = "" |
| |
| elif line.isspace(): |
| lines_in_one_abstract = one_abstract.splitlines() |
| for line_id, line in enumerate(lines_in_one_abstract): |
| |
| line_infor = {} |
| line_infor['abstract_id'] = abstract_id |
| |
| line_split = line.split("\t") |
| line_infor['target'] = line_split[0] |
| line_infor['text'] = line_split[1].lower() |
| line_infor['line_id'] = line_id |
| line_infor['length_lines'] = len(line_split[1].split(" ")) |
| line_infor['total_lines'] = len(lines_in_one_abstract) - 1 |
| abstract_samples.append(line_infor) |
| else: |
| one_abstract += line |
| return abstract_samples |
|
|
|
|
| def get_information_infer(samples): |
| """ |
| Extract feature in inference phase |
| """ |
|
|
| total_line = len(samples) |
|
|
| sample_lines = [] |
| for id, line in enumerate(samples): |
| one_line = {} |
| one_line['text'] = str(line) |
| one_line['line_id'] = id |
| one_line['length_lines'] = len(line.split(" ")) |
| one_line['total_lines'] = total_line - 1 |
| sample_lines.append(one_line) |
| return sample_lines |
|
|
|
|
|
|
| def split_into_char(line): |
| """Split a line into char""" |
| return " ".join(list(line)) |
|
|
|
|
|
|
| def convert_to_one_hot(y_pred): |
| """ |
| Convert probability vector y_pred into one-hot vector |
| """ |
| for row in range(len(y_pred)): |
| max_index = np.argmax(y_pred[row]) |
| y_pred[row] = np.zeros((1, len(y_pred[row]))) |
| y_pred[row][max_index] = 1 |
| return y_pred |
|
|
|
|
|
|
| def convert_to_one_hot(y_pred): |
| """ |
| Convert probability vector into one-hot vector |
| """ |
| for row in range(len(y_pred)): |
| max_index = np.argmax(y_pred[row]) |
| y_pred[row] = np.zeros((1, len(y_pred[row]))) |
| y_pred[row][max_index] = 1 |
| return y_pred |
|
|
|
|
|
|
| def get_confusion_matrix(y_true, y_pred, classes, |
| normalize=False, |
| title=None, |
| cmap=plt.cm.Blues): |
| """ |
| Get confusion matrix of model predictio |
| Normalization can be applied by setting `normalize=True`. |
| """ |
| if not title: |
| if normalize: |
| title = 'Normalized confusion matrix' |
| else: |
| title = 'Confusion matrix, without normalization' |
|
|
| |
| cm = confusion_matrix(y_true, y_pred) |
|
|
| if normalize: |
| cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] |
|
|
| plt.figure(figsize=(8, 6)) |
| sns.heatmap(cm, annot=True, fmt=".2f" if normalize else "d", cmap=cmap, square=True, |
| xticklabels=classes, yticklabels=classes) |
| plt.title(title) |
| plt.xlabel('Predicted label') |
| plt.ylabel('True label') |
|
|
| plt.show() |
|
|
|
|
|
|
| |
|
|
| |
| def get_base64(bin_file): |
| with open(bin_file, 'rb') as f: |
| data = f.read() |
| return base64.b64encode(data).decode() |
|
|
|
|
| |
| def set_background(png_file): |
| bin_str = get_base64(png_file) |
| page_bg_img = ''' |
| <style> |
| .stApp { |
| background-image: url("data:image/png;base64,%s"); |
| background-size: cover; |
| } |
| </style> |
| ''' % bin_str |
| st.markdown(page_bg_img, unsafe_allow_html=True) |
|
|
|
|
| def get_block(custom_text, back_ground): |
| block = f""" |
| <div style=" |
| {back_ground} |
| border-radius: 6px; |
| min-height: 80px; |
| padding: 20px; |
| --shadow: 1px 1px 1px 1px rgb(0 0 0 / 0.25); |
| box-shadow: var(--shadow); |
| border-radius: 25px; |
| box-sizing: border-box; |
| text-align: justify; |
| "> |
| <h4 style="color:#ffffff; font-family: cursive; font-size: 16px; font-weight: 200"> |
| {custom_text}</h4> |
| </div> |
| </br>""" |
| st.markdown(block, unsafe_allow_html=True) |