Spaces:
Sleeping
Sleeping
| import json | |
| import pandas as pd | |
| import plotly.express as px | |
| # Language codes predicted by language detection model | |
| LANG_CODES = ['ar', 'bg', 'de', 'el', 'en', 'es', 'fr', 'hi', 'it', 'ja', | |
| 'nl', 'pl', 'pt', 'ru', 'sw', 'th', 'tr', 'ur', 'vi', 'zh'] | |
| COUNTRY_TO_LANG_CODE = { | |
| 'Algeria': 'ar', | |
| 'Chad': 'ar', | |
| 'Djibouti': 'ar', | |
| 'Egypt': 'ar', | |
| 'Iraq': 'ar', | |
| 'Jordan': 'ar', | |
| 'Kuwait': 'ar', | |
| 'Lebanon': 'ar', | |
| 'Libya': 'ar', | |
| 'Mali': 'ar', | |
| 'Mauritania': 'ar', | |
| 'Morocco': 'ar', | |
| 'Oman': 'ar', | |
| 'Palestine': 'ar', | |
| 'Qatar': 'ar', | |
| 'Saudi Arabia': 'ar', | |
| 'Somalia': 'ar', | |
| 'Sudan': 'ar', | |
| 'Syria': 'ar', | |
| 'Tunisia': 'ar', | |
| 'United Arab Emirates': 'ar', | |
| 'Yemen': 'ar', | |
| 'Bulgaria': 'bg', | |
| 'Germany': 'de', | |
| 'Greece': 'el', | |
| 'Cyprus': 'el', | |
| 'United States of America': 'en', | |
| 'Ireland': 'en', | |
| 'United Kingdom': 'en', | |
| 'Canada': 'en', | |
| 'Australia': 'en', | |
| 'Mexico': 'es', | |
| 'Mexico': 'es', | |
| 'Colombia': 'es', | |
| 'Spain': 'es', | |
| 'Argentina': 'es', | |
| 'Peru': 'es', | |
| 'Venezuela': 'es', | |
| 'Chile': 'es', | |
| 'Guatemala': 'es', | |
| 'Ecuador': 'es', | |
| 'Bolivia': 'es', | |
| 'Cuba': 'es', | |
| 'Dominican Rep.': 'es', | |
| 'Honduras': 'es', | |
| 'Paraguay': 'es', | |
| 'El Salvador': 'es', | |
| 'Nicaragua': 'es', | |
| 'Costa Rica': 'es', | |
| 'Panama': 'es', | |
| 'Uruguay': 'es', | |
| 'Guinea': 'es', | |
| 'France': 'fr', | |
| 'India': 'hi', | |
| 'Italy': 'it', | |
| 'Japan': 'ja', | |
| 'Netherlands': 'nl', | |
| 'Belgium': 'nl', | |
| 'Poland': 'pl', | |
| 'Portugal': 'pt', | |
| 'Russia': 'ru', | |
| 'Uganda': 'sw', | |
| 'Kenya': 'sw', | |
| 'Tanzania': 'sw', | |
| 'Thailand': 'th', | |
| 'Turkey': 'tr', | |
| 'Pakistan': 'ur', | |
| 'Vietnam': 'vi', | |
| 'China': 'zh' | |
| } | |
| def lang_map(df): | |
| with open('data/countries.geo.json') as f: | |
| countries = json.load(f) | |
| country_list = [country['properties']['name'] | |
| for country in dict(countries)['features']] | |
| LANG_CODES = df.value_counts('predicted_language') | |
| countries_data = [] | |
| lang_count_data = [] | |
| lang_code_data = [] | |
| for country in country_list: | |
| if country in COUNTRY_TO_LANG_CODE: | |
| country_lang = COUNTRY_TO_LANG_CODE[country] | |
| if country_lang in LANG_CODES.index: | |
| countries_data.append(country) | |
| lang_count = LANG_CODES.loc[COUNTRY_TO_LANG_CODE[country]] | |
| lang_count_data.append(lang_count) | |
| lang_code_data.append(country_lang) | |
| lang_df = pd.DataFrame({ | |
| 'country': countries_data, | |
| 'count': lang_count_data, | |
| 'lang_code': lang_code_data | |
| }) | |
| fig = px.choropleth( | |
| lang_df, | |
| geojson=countries, | |
| locations='country', | |
| locationmode='country names', | |
| color='count', | |
| color_continuous_scale=[ | |
| [0, "rgb(45,45,48)"], | |
| [0.33, "rgb(116,173,209)"], | |
| [0.66, "rgb(255,255,0)"], | |
| [1, "rgb(255,94,5)"] | |
| ], | |
| scope='world', | |
| hover_data=['lang_code'], | |
| labels={'count': "Language Count"}, | |
| template='plotly_dark' | |
| ) | |
| fig.update_geos(showcountries=True) | |
| fig.update_layout( | |
| title_text="Language Map", | |
| margin={"r": 0, "t": 20, "l": 0, "b": 0} | |
| ) | |
| return fig | |