IgnoreLee's picture
first check commit
b20898d
import os
import numpy as np
import pandas as pd
import gradio as gr
import matplotlib as mpl
import matplotlib.pyplot as plt
from collections import Counter
from urllib.parse import quote
from huggingface_hub import hf_hub_download
from datasets import load_dataset
from datasets import Dataset
from huggingface_hub import HfApi, HfFolder
import logging
logging.basicConfig(level=logging.DEBUG)
space_name = 'Grade_Dataset'
access_token = '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'
DATA_PATH = "https://huggingface.co/datasets/IgnoreLee/Grade_Dataset/tree/main"
IMG_PATH = os.path.join(DATA_PATH, "img")
CLASS_LST = ["μ§€λŠ₯ν˜•μ‹œμŠ€ν…œ(κ°€)", "μ§€λŠ₯ν˜•μ‹œμŠ€ν…œ(λ‚˜)"]
def _get_csv_file(name,repo_id = "IgnoreLee/Grade_Dataset"):
return hf_hub_download(repo_id=repo_id,
filename=f"{name}.csv",
repo_type="dataset",
token="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")
def encoder(strings):
original = ["(", ")", "_", "-", ".", ",", "~", "/", ";", "\\", " "] + list(range(0, 9))
en_str = ""
for str in strings:
if str in original:
en_str += str
else:
en_str += quote(str)
return en_str
def draw_figure(count_dict:dict, mean, median) -> np.array:
# font_path = os.path.join(".","font","NanumGothic.ttf")
# font = mpl.font_manager.FontProperties(fname=font_path).get_name()
# mpl.rc('font', family=font)
fig = plt.figure()
plt.title("Overall score distribution")
plt.bar(count_dict.keys(), count_dict.values(), width=0.4, label = f"Total number of people {np.sum(list(count_dict.values()))}")
plt.axvline(mean.sum(), label=f"Mean : ({mean.sum():.2f})", color="r", linestyle=":")
plt.legend()
plt.xlabel = "Grade"
plt.ylabel = "Number of Person"
fig.canvas.draw()
return np.array(fig.canvas.renderer._renderer)
def csv_analysis(data:pd.DataFrame):
datas = data.values
mean, std, = datas.mean(0), datas.std(0)
ranking = np.sort(np.unique(datas.sum(1)))[::-1]
count_dict = Counter(datas.sum(1))
return (mean, std, ranking), count_dict
def check_grade(name, number, course:str):
assert course in CLASS_LST, gr.Error("μˆ˜μ—… 이름을 ν™•μΈν•΄μ£Όμ„Έμš”.")
# Preprocessing
number = int(number)
# hf_folder = HfApi(space_name, f'{course}.csv')
# dataset = Dataset.from_config(hf_folder, data_files={"access_token": access_token})
dataset = load_dataset(f"IgnoreLee/Grade_Dataset", data_files = f"{course}.csv", use_auth_token=access_token)
logging.debug("Debug message: {}".format(dataset))
data = dataset['train'].to_pandas()
logging.debug("Debug message: {}".format(data))
data.set_index(list(data.columns[:2]), inplace=True)
logging.debug("Debug message: {}".format(data))
data.fillna(0, inplace=True)
cols = list(data.columns)
# Data Analysis
(mean, std, ranking), count_dict = csv_analysis(data)
logging.debug("Debug message: {}".format(count_dict))
finded_student = data.loc[number, name, :]
finded_student.loc[:,"합계"] = finded_student.loc[:,cols].values.sum(1)
median = np.median(finded_student[cols].values.sum(1))
# Draw Figure
img = draw_figure(count_dict, mean, median)
return img, finded_student