import os import numpy as np import pandas as pd import gradio as gr import matplotlib as mpl import matplotlib.pyplot as plt from collections import Counter from urllib.parse import quote from huggingface_hub import hf_hub_download from datasets import load_dataset from datasets import Dataset from huggingface_hub import HfApi, HfFolder import logging logging.basicConfig(level=logging.DEBUG) space_name = 'Grade_Dataset' access_token = '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' DATA_PATH = "https://huggingface.co/datasets/IgnoreLee/Grade_Dataset/tree/main" IMG_PATH = os.path.join(DATA_PATH, "img") CLASS_LST = ["지능형시스템(가)", "지능형시스템(나)"] def _get_csv_file(name,repo_id = "IgnoreLee/Grade_Dataset"): return hf_hub_download(repo_id=repo_id, filename=f"{name}.csv", repo_type="dataset", token="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") def encoder(strings): original = ["(", ")", "_", "-", ".", ",", "~", "/", ";", "\\", " "] + list(range(0, 9)) en_str = "" for str in strings: if str in original: en_str += str else: en_str += quote(str) return en_str def draw_figure(count_dict:dict, mean, median) -> np.array: # font_path = os.path.join(".","font","NanumGothic.ttf") # font = mpl.font_manager.FontProperties(fname=font_path).get_name() # mpl.rc('font', family=font) fig = plt.figure() plt.title("Overall score distribution") plt.bar(count_dict.keys(), count_dict.values(), width=0.4, label = f"Total number of people {np.sum(list(count_dict.values()))}") plt.axvline(mean.sum(), label=f"Mean : ({mean.sum():.2f})", color="r", linestyle=":") plt.legend() plt.xlabel = "Grade" plt.ylabel = "Number of Person" fig.canvas.draw() return np.array(fig.canvas.renderer._renderer) def csv_analysis(data:pd.DataFrame): datas = data.values mean, std, = datas.mean(0), datas.std(0) ranking = np.sort(np.unique(datas.sum(1)))[::-1] count_dict = Counter(datas.sum(1)) return (mean, std, ranking), count_dict def check_grade(name, number, course:str): assert course in CLASS_LST, gr.Error("수업 이름을 확인해주세요.") # Preprocessing number = int(number) # hf_folder = HfApi(space_name, f'{course}.csv') # dataset = Dataset.from_config(hf_folder, data_files={"access_token": access_token}) dataset = load_dataset(f"IgnoreLee/Grade_Dataset", data_files = f"{course}.csv", use_auth_token=access_token) logging.debug("Debug message: {}".format(dataset)) data = dataset['train'].to_pandas() logging.debug("Debug message: {}".format(data)) data.set_index(list(data.columns[:2]), inplace=True) logging.debug("Debug message: {}".format(data)) data.fillna(0, inplace=True) cols = list(data.columns) # Data Analysis (mean, std, ranking), count_dict = csv_analysis(data) logging.debug("Debug message: {}".format(count_dict)) finded_student = data.loc[number, name, :] finded_student.loc[:,"합계"] = finded_student.loc[:,cols].values.sum(1) median = np.median(finded_student[cols].values.sum(1)) # Draw Figure img = draw_figure(count_dict, mean, median) return img, finded_student