Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -7,17 +7,24 @@ df = pd.read_csv("./stackv2_languages_freq.csv")
|
|
| 7 |
langs = sorted(list(df["language"].unique()))
|
| 8 |
exts = list(df["extension"].unique())
|
| 9 |
|
| 10 |
-
|
| 11 |
-
lang_to_ext = defaultdict(set)
|
| 12 |
-
|
| 13 |
-
for lang, ext in zip(df["language"], df["extension"]):
|
| 14 |
-
if isinstance(ext, str):
|
| 15 |
-
lang_to_ext[lang].update([ext])
|
| 16 |
-
|
| 17 |
def compute(lang):
|
| 18 |
-
|
| 19 |
-
report = f"## Summary:\n\n The `{lang}` language has {len(lang_to_ext[lang])} extensions: {', '.join(sorted(['`'+ext+'`' for ext in lang_to_ext[lang]]))}.\n\n"
|
| 20 |
df_lang = df[df["language"]==lang]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
|
| 22 |
for i, (ext, example) in enumerate(zip(df_lang["extension"], df_lang["content"])):
|
| 23 |
example_string = f"**Example {i+1} (extension=`{ext}`):**\n\n ```\n{example}\n```\n\n"
|
|
|
|
| 7 |
langs = sorted(list(df["language"].unique()))
|
| 8 |
exts = list(df["extension"].unique())
|
| 9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
def compute(lang):
|
|
|
|
|
|
|
| 11 |
df_lang = df[df["language"]==lang]
|
| 12 |
+
# clean up weird exts
|
| 13 |
+
df_lang = df_lang[df_lang["ext_fraction_per_lang"] > 0.0001].reset_index()
|
| 14 |
+
|
| 15 |
+
df_lang_uniq = df_lang.groupby("extension").first().reset_index()
|
| 16 |
+
|
| 17 |
+
report = f"## Summary:\n\n The `{lang}` language has {df_lang_uniq.shape[0]} extensions: \n\n"
|
| 18 |
+
|
| 19 |
+
for i, (ext, ext_fraction, gen_fraction, vend_fraction) in enumerate(zip(df_lang_uniq["extension"], df_lang_uniq["ext_fraction_per_lang"], df_lang_uniq["generated_fraction"], df_lang_uniq["vendor_fraction"])):
|
| 20 |
+
fractions_string = f"{ext_fraction*100:.2f}%"
|
| 21 |
+
if gen_fraction > 0.5:
|
| 22 |
+
fractions_string += f", autogenerated: {gen_fraction*100:.2f}%"
|
| 23 |
+
if vend_fraction > 0.5:
|
| 24 |
+
fractions_string += f", vendor files: {vend_fraction*100:.2f}%"
|
| 25 |
+
|
| 26 |
+
report += f"`{ext}` ({fractions_string}), \n\n"
|
| 27 |
+
report = report[:-2] + "\n\n\n\n"
|
| 28 |
|
| 29 |
for i, (ext, example) in enumerate(zip(df_lang["extension"], df_lang["content"])):
|
| 30 |
example_string = f"**Example {i+1} (extension=`{ext}`):**\n\n ```\n{example}\n```\n\n"
|