""" uv venv -p3.11 uv pip install phonikud-onnx dicta-onnx nakdimon-onnx pandas git+https://github.com/thewh1teagle/phonikud wget https://huggingface.co/thewh1teagle/phonikud-onnx/resolve/main/phonikud-1.0.int8.onnx wget https://github.com/thewh1teagle/dicta-onnx/releases/download/model-files-v1.0/dicta-1.0.int8.onnx wget https://github.com/thewh1teagle/nakdimon-onnx/releases/download/v0.1.0/nakdimon.onnx """ import csv import pandas as pd from tqdm import tqdm from dicta_onnx import Dicta from phonikud_onnx import Phonikud from nakdimon_onnx import Nakdimon import phonikud tqdm.pandas() # Load unvocalized data once df = pd.read_csv('study_unvocalized.csv', sep='|', names=['k', 'v']) # Initialize models nakdimon = Nakdimon("nakdimon.onnx") dicta = Dicta('./dicta-1.0.int8.onnx') phonikud_model = Phonikud('./phonikud-1.0.int8.onnx') # Apply Dicta diacritics df['dicta_vocalized'] = df['v'].progress_apply(lambda sentence: dicta.add_diacritics(sentence, mark_matres_lectionis='')) # Apply Phonikud diacritics df['phonikud_vocalized'] = df['v'].progress_apply(phonikud_model.add_diacritics) df['phonikud_phonemes'] = df['phonikud_vocalized'].progress_apply(phonikud.phonemize) # Apply Nakdimon diacritics df['nakdimon_vocalized'] = df['v'].progress_apply(nakdimon.compute) # Save outputs df[['k', 'dicta_vocalized']].to_csv('study_dicta.csv', sep='|', index=False, header=False) df[['k', 'nakdimon_vocalized']].to_csv('study_nakdimon.csv', sep='|', index=False, header=False) df[['k', 'phonikud_vocalized', 'phonikud_phonemes']].to_csv('study_phonikud.csv', sep=',', index=False, header=False)