| import numpy as np | |
| import pandas as pd | |
| from languages import en, ru | |
| category_init = "label" | |
| certainty_init = "score" | |
| def filter_results(results: pd.DataFrame, top_k=0.95): | |
| certs = results[certainty_init].to_numpy() | |
| cum_certs = certs.cumsum() | |
| do_no_show = cum_certs < top_k | |
| first_not_to_show_id = np.argmin(do_no_show) + 1 | |
| results = results.iloc[:first_not_to_show_id] | |
| results = results[results[certainty_init] >= 0.01] | |
| return results | |
| def process_keys(results: pd.DataFrame, lang): | |
| category = {en: "Category", ru: "Категория"} | |
| certainty = {en: "Certainty", ru: "Уверенность"} | |
| results = results.rename( | |
| columns={ | |
| category_init: category.get(lang, category[en]), | |
| certainty_init: certainty.get(lang, certainty[en]), | |
| } | |
| ) | |
| return results | |
| def process_categories(results, lang): | |
| categories = { | |
| en: { | |
| "math": "Math", | |
| "astro-ph": "Astrophysics", | |
| "cond-mat": "Condensed matter physics", | |
| "hep-ph": "High energy physics -- Phenomenology", | |
| "physics": "Physics", | |
| "hep-th": "High energy physics -- Theory", | |
| "cs": "Computer Science", | |
| "quant-ph": "Quantum physics", | |
| "gr-qc": "General Relativity and Quantum Cosmology", | |
| "math-ph": "Mathematical Physics", | |
| "nucl-th": "Nuclear Theory", | |
| "eess": "Electrical Engineering and Systems Science", | |
| "q-bio": "Quantitative Biology", | |
| "nlin": "Nonlinear Sciences", | |
| "stat": "Statistics", | |
| "hep-lat": "High Energy Physics - Lattice", | |
| "hep-ex": "High Energy Physics - Experiment", | |
| "nucl-ex": "Nuclear Experiment", | |
| "econ": "Economins", | |
| "q-alg": "Quantum Algebra", | |
| "q-fin": "Quantitative Finance", | |
| "alg-geom": "Algebraic Geometry", | |
| "supr-con": "Superconductivity", | |
| "chao-dyn": "Chaotic dynamics", | |
| "dg-ga": "Differential Geometry", | |
| "funct-an": "Functional analysis", | |
| "atom-ph": "Atomic physics", | |
| "chem-ph": "Chemical Physics", | |
| "ao-sci": "Atmospheric and Oceanic Physics", | |
| "acc-phys": "Accelerator Physics", | |
| "bayes-an": "Bayesian statistics", | |
| "plasm-ph": "Plasma Physics", | |
| }, | |
| ru: { | |
| "math": "Математика", | |
| "astro-ph": "Астрофизика", | |
| "cond-mat": "Физика конденсированного состояния", | |
| "hep-ph": "Физика элементарных частиц -- Феноменология", | |
| "physics": "Физика", | |
| "hep-th": "Физика элементарных частиц -- Теория", | |
| "cs": "Компьютерные науки", | |
| "quant-ph": "Квантовая физика", | |
| "gr-qc": "Общая теория относительности и квантовая космология", | |
| "math-ph": "Математическая физика", | |
| "nucl-th": "Ядерная физика", | |
| "eess": "Электротехника и системоведение", | |
| "q-bio": "Количественная биология", | |
| "nlin": "Нелинейные науки", | |
| "stat": "Статистика", | |
| "hep-lat": "Физика элементарных частиц -- Решетки", | |
| "hep-ex": "Экспериментальная физика элементарных частиц", | |
| "nucl-ex": "Ядерный эксперимент", | |
| "econ": "Экономика", | |
| "q-alg": "Квантовая алгебра", | |
| "q-fin": "Количественные финансы", | |
| "alg-geom": "Алгебраическая геометрия", | |
| "supr-con": "Сверхпроводимость", | |
| "chao-dyn": "Теория хаоса", | |
| "dg-ga": "Дифференциальная геометрия", | |
| "funct-an": "Функциональный анализ", | |
| "atom-ph": "Атомная физика", | |
| "chem-ph": "Химическая физика", | |
| "ao-sci": "Физика атмосферы и океана", | |
| "acc-phys": "Физика ускорителей", | |
| "bayes-an": "Байесовская статистика", | |
| "plasm-ph": "Физика плазмы", | |
| }, | |
| } | |
| def process_category(category): | |
| if "." in category: | |
| category = category[: category.index(".")] | |
| return categories.get(lang, {}).get(category, category) | |
| results[category_init] = results[category_init].apply(process_category) | |
| return results | |
| def process_certainities(results): | |
| results[certainty_init] = results[certainty_init].apply( | |
| lambda certainty: "{0:0.2f}%".format(100 * certainty) | |
| ) | |
| return results | |
| def process_results(results, lang): | |
| results = pd.DataFrame(results) | |
| results = process_categories(results, lang) | |
| results = results.groupby(by=category_init, as_index=False).sum() | |
| results = results.sort_values(by=[certainty_init], ascending=False) | |
| results = filter_results(results) | |
| results = process_certainities(results) | |
| results = process_keys(results, lang) | |
| return results | |