Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import pandas as pd | |
| from collections import defaultdict | |
| df = pd.read_csv("./stackv2_languages_freq.csv") | |
| df["extension"].fillna("[no ext]", inplace=True) | |
| langs = sorted(list(df["language"].unique())) | |
| exts = list(df["extension"].unique()) | |
| def compute(lang): | |
| df_lang = df[df["language"]==lang] | |
| # clean up weird exts | |
| df_lang = df_lang[df_lang["ext_fraction_per_lang"] > 0.0001].reset_index() | |
| df_lang_uniq = df_lang.groupby("extension").first().reset_index() | |
| report = f"## Summary:\n\n The `{lang}` language ({df_lang["lang_fraction"].iloc[0]*100:.4f}% of all) has {df_lang_uniq.shape[0]} extensions: \n\n" | |
| for i, (ext, ext_fraction, gen_fraction, vend_fraction) in enumerate(zip(df_lang_uniq["extension"], df_lang_uniq["ext_fraction_per_lang"], df_lang_uniq["generated_fraction"], df_lang_uniq["vendor_fraction"])): | |
| fractions_string = f"{min(ext_fraction, 1)*100:.2f}%" | |
| if gen_fraction > 0.2: | |
| fractions_string += f", autogenerated: {min(gen_fraction, 1)*100:.2f}%" | |
| if vend_fraction > 0.2: | |
| fractions_string += f", vendor files: {min(vend_fraction, 1)*100:.2f}%" | |
| report += f"`{ext}` ({fractions_string}), \n\n" | |
| report = report[:-2] + "\n\n\n\n" | |
| for i, (ext, example) in enumerate(zip(df_lang["extension"], df_lang["content"])): | |
| example_string = f"**Example {i+1} (extension=`{ext}`):**\n\n ```\n{example}\n```\n\n" | |
| report += example_string | |
| return report.strip() | |
| with gr.Blocks() as demo: | |
| gr.Markdown(f"# Programming Languages for The Stack v2\n\nIn total there are **{len(langs)} languages** and **{len(exts)} extensions.**") | |
| lang_select = gr.Dropdown(choices=langs, value="Python") | |
| md = gr.Markdown("") | |
| lang_select.change(fn=compute, inputs=[lang_select], outputs=[md]) | |
| demo.load(fn=compute, inputs=[lang_select], outputs=[md]) | |
| demo.launch() | |