Spaces:
Sleeping
Sleeping
| import json | |
| import pickle | |
| import gradio as gr | |
| import matplotlib.pyplot as plt | |
| import numpy as np | |
| def plot_scalar_on_scale(scalar_value, distance_type): | |
| # Ensure the scalar is within bounds | |
| scalar_value = np.clip(scalar_value, 0.0, 1.0) | |
| # Create a figure and axis | |
| fig, ax = plt.subplots(figsize=(8, 2)) | |
| # Create a horizontal gradient (from close to distant) | |
| gradient = np.linspace(0, 1, 256).reshape(1, -1) | |
| ax.imshow(gradient, extent=[0, 1, 0, 1], aspect='auto', cmap='viridis_r') | |
| # Plot the scalar value as a vertical line | |
| ax.axvline(x=scalar_value, color='white', lw=5) | |
| # Add a dot at the scalar position | |
| ax.plot(scalar_value, 0.5, 'o', color='white', markersize=42) | |
| ax.text(scalar_value, 0.5, f'{scalar_value:.2f}', color='black', ha='center', va='center', fontsize=14) | |
| # Add labels rotated 90 degrees on the sides | |
| ax.text(-0.03, 0.5, 'Close', ha='center', va='center', fontsize=14, rotation=90) | |
| ax.text(1.03, 0.5, 'Distant', ha='center', va='center', fontsize=14, rotation=270) | |
| # Customize the axis | |
| ax.set_xticks([]) # Remove x-axis ticks | |
| ax.set_yticks([]) # Remove y-axis ticks | |
| ax.set_xlim(0, 1) | |
| ax.set_ylim(0, 1) | |
| ax.set_title(distance_type) | |
| # Remove spines for a cleaner look | |
| ax.spines['top'].set_visible(False) | |
| ax.spines['right'].set_visible(False) | |
| ax.spines['bottom'].set_visible(False) | |
| ax.spines['left'].set_visible(False) | |
| return fig | |
| # Show the plot | |
| # plt.tight_layout() | |
| # plt.show() | |
| def load_json_from_path(path): | |
| with open(path, "r", encoding="utf8") as f: | |
| obj = json.loads(f.read()) | |
| return obj | |
| class Measurer: | |
| def __init__(self): | |
| # learned dist | |
| tree_lookup_path = "lang_1_to_lang_2_to_l1_dist.json" | |
| self.learned_dist_func = load_json_from_path(tree_lookup_path) | |
| # tree dist | |
| tree_lookup_path = "lang_1_to_lang_2_to_tree_dist.json" | |
| self.tree_dist_func = load_json_from_path(tree_lookup_path) | |
| # map dist | |
| map_lookup_path = "lang_1_to_lang_2_to_map_dist.json" | |
| self.map_dist_func = load_json_from_path(map_lookup_path) | |
| largest_value_map_dist = 0.0 | |
| for _, values in self.map_dist_func.items(): | |
| for _, value in values.items(): | |
| largest_value_map_dist = max(largest_value_map_dist, value) | |
| for key1 in self.map_dist_func: | |
| for key2 in self.map_dist_func[key1]: | |
| self.map_dist_func[key1][key2] = self.map_dist_func[key1][key2] / largest_value_map_dist | |
| # ASP | |
| asp_dict_path = "asp_dict.pkl" | |
| with open(asp_dict_path, 'rb') as dictfile: | |
| asp_sim = pickle.load(dictfile) | |
| lang_list = list(asp_sim.keys()) | |
| self.asp_dist_func = dict() | |
| seen_langs = set() | |
| for lang_1 in lang_list: | |
| if lang_1 not in seen_langs: | |
| seen_langs.add(lang_1) | |
| self.asp_dist_func[lang_1] = dict() | |
| for index, lang_2 in enumerate(lang_list): | |
| if lang_2 not in seen_langs: # it's symmetric | |
| self.asp_dist_func[lang_1][lang_2] = 1 - asp_sim[lang_1][index] | |
| def get_dists(self, l1, l2): | |
| if l1 in self.tree_dist_func: | |
| if l2 in self.tree_dist_func[l1]: | |
| tree_dist = self.tree_dist_func[l1][l2] | |
| else: | |
| tree_dist = self.tree_dist_func[l2][l1] | |
| else: | |
| tree_dist = self.tree_dist_func[l2][l1] | |
| if l1 in self.map_dist_func: | |
| if l2 in self.map_dist_func[l1]: | |
| map_dist = self.map_dist_func[l1][l2] | |
| else: | |
| map_dist = self.map_dist_func[l2][l1] | |
| else: | |
| map_dist = self.map_dist_func[l2][l1] | |
| try: | |
| if l1 in self.asp_dist_func: | |
| if l2 in self.asp_dist_func[l1]: | |
| asp_dist = self.asp_dist_func[l1][l2] | |
| else: | |
| asp_dist = self.asp_dist_func[l2][l1] | |
| else: | |
| asp_dist = self.asp_dist_func[l2][l1] | |
| except KeyError: | |
| asp_dist = tree_dist # dirty hack, but like 4 codes are not part of phonepiece | |
| if l1 in self.learned_dist_func: | |
| if l2 in self.learned_dist_func[l1]: | |
| learned_dist = self.learned_dist_func[l1][l2] | |
| else: | |
| learned_dist = self.learned_dist_func[l2][l1] | |
| else: | |
| learned_dist = self.learned_dist_func[l2][l1] | |
| return tree_dist, map_dist, asp_dist, learned_dist | |
| def measure(self, l1, l2): | |
| if l1 == l2: | |
| f1 = plot_scalar_on_scale(0.0, f"Language Family Tree Distance between {l1} and {l2}") | |
| f2 = plot_scalar_on_scale(0.0, f"Distance on the Globe between {l1} and {l2}") | |
| f3 = plot_scalar_on_scale(0.0, f"Phoneme-Frequency Distance between {l1} and {l2}") | |
| f4 = plot_scalar_on_scale(0.0, f"Machine-Learned Distance between {l1} and {l2}") | |
| else: | |
| tree_dist, map_dist, asp_dist, learned_dist = self.get_dists(l1.split(" ")[-1].split("(")[1].split(")")[0], | |
| l2.split(" ")[-1].split("(")[1].split(")")[0]) | |
| f1 = plot_scalar_on_scale(tree_dist, f"Language Family Tree Distance between {l1} and {l2}") | |
| f2 = plot_scalar_on_scale(map_dist, f"Distance on the Globe between {l1} and {l2}") | |
| f3 = plot_scalar_on_scale(asp_dist, f"Phoneme-Frequency Distance between {l1} and {l2}") | |
| f4 = plot_scalar_on_scale(learned_dist, f"Machine-Learned Distance between {l1} and {l2}") | |
| return f1, f2, f3, f4 | |
| m = Measurer() | |
| iso_to_name = load_json_from_path("iso_to_fullname.json") | |
| text_selection = [f"{iso_to_name[iso_code]} ({iso_code})" for iso_code in iso_to_name] | |
| iface = gr.Interface(fn=m.measure, | |
| inputs=[gr.Dropdown(text_selection, | |
| type="value", | |
| value='English (eng)', | |
| label="Select the fist Language (type on your keyboard to find it quickly)"), | |
| gr.Dropdown(text_selection, | |
| type="value", | |
| value='German (deu)', | |
| label="Select the second Language (type on your keyboard to find it quickly)")], | |
| outputs=[gr.Plot(label="", show_label=False, format="png", container=True), | |
| gr.Plot(label="", show_label=False, format="png", container=True), | |
| gr.Plot(label="", show_label=False, format="png", container=True), | |
| gr.Plot(label="", show_label=False, format="png", container=True)], | |
| description="<br><br> This demo allows you to view the distance between two languages from the ISO 639-3 list according to several distance measurement functions. " | |
| "For more information, check out our paper: https://arxiv.org/abs/2406.06403 and our text-to-speech tool, in which we make use of " | |
| "this technique: https://github.com/DigitalPhonetics/IMS-Toucan <br><br>") | |
| iface.launch() | |