File size: 3,249 Bytes
b20898d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import os
import numpy as np
import pandas as pd
import gradio as gr
import matplotlib as mpl
import matplotlib.pyplot as plt
from collections import Counter 
from urllib.parse import quote
from huggingface_hub import  hf_hub_download
from datasets import load_dataset
from datasets import Dataset
from huggingface_hub import HfApi, HfFolder
import logging

logging.basicConfig(level=logging.DEBUG)


space_name = 'Grade_Dataset'
access_token = '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'

DATA_PATH = "https://huggingface.co/datasets/IgnoreLee/Grade_Dataset/tree/main"
IMG_PATH  = os.path.join(DATA_PATH, "img")
CLASS_LST = ["μ§€λŠ₯ν˜•μ‹œμŠ€ν…œ(κ°€)", "μ§€λŠ₯ν˜•μ‹œμŠ€ν…œ(λ‚˜)"]

def _get_csv_file(name,repo_id = "IgnoreLee/Grade_Dataset"):
    return hf_hub_download(repo_id=repo_id, 
    filename=f"{name}.csv", 
    repo_type="dataset", 
    token="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")

def encoder(strings):
    original = ["(", ")", "_", "-", ".", ",", "~", "/", ";", "\\", " "] + list(range(0, 9))
    en_str = ""
    for str in strings:
        if str in original:
            en_str += str
        else:
            en_str += quote(str)
    return en_str

def draw_figure(count_dict:dict, mean, median) -> np.array:
    # font_path = os.path.join(".","font","NanumGothic.ttf")
    # font = mpl.font_manager.FontProperties(fname=font_path).get_name()
    # mpl.rc('font', family=font)

    fig = plt.figure()
    plt.title("Overall score distribution")
    plt.bar(count_dict.keys(), count_dict.values(), width=0.4, label = f"Total number of people {np.sum(list(count_dict.values()))}")
    plt.axvline(mean.sum(), label=f"Mean : ({mean.sum():.2f})", color="r", linestyle=":")
    plt.legend()
    plt.xlabel = "Grade"
    plt.ylabel = "Number of Person"
    fig.canvas.draw()
    return np.array(fig.canvas.renderer._renderer)

def csv_analysis(data:pd.DataFrame):
    datas = data.values
    mean, std, = datas.mean(0), datas.std(0)
    ranking = np.sort(np.unique(datas.sum(1)))[::-1]
    count_dict = Counter(datas.sum(1))
    return (mean, std, ranking), count_dict

def check_grade(name, number, course:str):
    assert course in CLASS_LST, gr.Error("μˆ˜μ—… 이름을 ν™•μΈν•΄μ£Όμ„Έμš”.")

    # Preprocessing
    number = int(number)
    # hf_folder = HfApi(space_name, f'{course}.csv')
    # dataset = Dataset.from_config(hf_folder, data_files={"access_token": access_token})
    dataset = load_dataset(f"IgnoreLee/Grade_Dataset", data_files = f"{course}.csv", use_auth_token=access_token)
    logging.debug("Debug message: {}".format(dataset))
    data = dataset['train'].to_pandas()
    logging.debug("Debug message: {}".format(data))
    data.set_index(list(data.columns[:2]), inplace=True)
    logging.debug("Debug message: {}".format(data))
    data.fillna(0, inplace=True)
    cols = list(data.columns)

    # Data Analysis
    (mean, std, ranking), count_dict = csv_analysis(data)
    logging.debug("Debug message: {}".format(count_dict))
    finded_student = data.loc[number, name, :]
    finded_student.loc[:,"합계"] =  finded_student.loc[:,cols].values.sum(1)
    median = np.median(finded_student[cols].values.sum(1))

    # Draw Figure
    img = draw_figure(count_dict, mean, median)
    
    return img, finded_student