Spaces:
Sleeping
Sleeping
Adding all sample audios
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .DS_Store +0 -0
- app.py +1 -1
- sample_audios2/.DS_Store +0 -0
- sample_audios2/get_sample_audios.py +46 -0
- sample_audios2/hausa/20240112000734-65-1911-479965-ijie-yara-a-gidan-masun-kulla.wav +0 -0
- sample_audios2/hausa/20240112081950-157-1376-295388-amaryar-ta-sa-lalle-jiya.wav +0 -0
- sample_audios2/hausa/20240112112416-64-1950-482478-ka-ba-ni-giya-biyar-halitta.wav +0 -0
- sample_audios2/hausa/20240112125638-67-1866-469130-yaron-da-aka-reno-da-kyau-shin.wav +0 -0
- sample_audios2/hausa/20240112131726-67-1866-469159-ana-saka-agogo-ne-a-dakin-bacc.wav +0 -0
- sample_audios2/hausa/20240112134018-67-1866-469188-wani-kansila-ya-ginawa-wata-kw.wav +0 -0
- sample_audios2/hausa/20240116152848-157-1281-272584-a-arewa-mata-na-wankan-ruwan-z.wav +0 -0
- sample_audios2/hausa/20240117082346-157-1281-272642-a-da-famfas-ba-tsada.wav +0 -0
- sample_audios2/hausa/20240117134055-67-1882-472839-za-ka-siyo-sabuwar-motar-kuwa.wav +0 -0
- sample_audios2/hausa/20240121115511-55-2438-518642-wane-irin-abun-sha-kuke-da-su.wav +0 -0
- sample_audios2/hausa/20240121172900-66-2293-488230-nace-maka-ka-tura-yaron-nan-ma.wav +0 -0
- sample_audios2/hausa/20240204121303-161-2398-513605-akwai-wurin-sauka-da-tashin-ji.wav +0 -0
- sample_audios2/hausa/20240213144054-80-2528-633509-tsaftace-kicin-sau-aya-a-mako.wav +0 -0
- sample_audios2/hausa/20240214093423-162-1310-279512-ta-o-ja-ce.wav +0 -0
- sample_audios2/hausa/20240218162107-154-2563-668567-a-shekara-ta-dubu-biyu-da-tala.wav +0 -0
- sample_audios2/hausa/20240302083136-164-1320-281849-kawuna-ya-kashe-maku-an-ku-i-w.wav +0 -0
- sample_audios2/hausa/20240304141707-164-2408-515797-tugo-ya-zagaya-yanar-gizo-sosa.wav +0 -0
- sample_audios2/hausa/20240321193009-269-5308-1073284-e-ma-aikatan-na-da-ha-in-kai.wav +0 -0
- sample_audios2/hausa/20240321221217-266-5245-1058041-akwai-ban-aki-a-kowane-cikin-j.wav +0 -0
- sample_audios2/hausa/20240322182009-259-5053-1011949-fesbuk-na-amfani-da-data.wav +0 -0
- sample_audios2/hausa/20240322230610-260-5076-1017618-tun-kafin-ya-yi-aure-yake-addu.wav +0 -0
- sample_audios2/hausa/20240323072835-270-5336-1079890-bala-ya-bude-wani-shagon-sayar.wav +0 -0
- sample_audios2/hausa/20240328153436-263-5788-1398462-kuna-son-lokacin-da-abokin-tar.wav +0 -0
- sample_audios2/hausa/20240328155606-277-5936-1433978-lala-na-can-zata-sayi-ragon-su.wav +0 -0
- sample_audios2/hausa/20240329120557-270-5866-1417132-jeka-ka-gina-gidan-ka.wav +0 -0
- sample_audios2/hausa/20240417183811-265-5220-1052239-mata-da-miji-sukan-samu-sa-ani.wav +0 -0
- sample_audios2/igbo/20231219164207-47-289-65711-a-ga-m-emepe-ahi-a-ngbanwe-ego.wav +0 -0
- sample_audios2/igbo/20231226194928-42-597-162804-onye-nche-l-akwkw-any-nwe.wav +0 -0
- sample_audios2/igbo/20231227230415-25-232-51915-n-oge-gara-aga-ike-isi-uru-nk.wav +0 -0
- sample_audios2/igbo/20231231070219-8-472-156842-kedu-ihe--mere-iji-nweta-nsona.wav +0 -0
- sample_audios2/igbo/20240107182227-52-1799-376695-ee-m-ga-akwado-pati-lp.wav +0 -0
- sample_audios2/igbo/20240112075728-18-165-35795-i-ga-ara-mgbimgbi-ahu-echi.wav +0 -0
- sample_audios2/igbo/20240112090202-18-165-35812-nna-m-ji-hama-otu-n-ime-ngwaor.wav +0 -0
- sample_audios2/igbo/20240112102109-18-165-35910-i-na-akoro-ha-akuko-ifo.wav +0 -0
- sample_audios2/igbo/20240112110354-33-349-80086-i-choro-kaadi-ntuli-aka.wav +0 -0
- sample_audios2/igbo/20240113124748-22-220-49131-o-nweghi-i-ke-iricha-piza-o-zu.wav +0 -0
- sample_audios2/igbo/20240114072828-107-1938-531871-a-na-amu-asusu-o-bula-amu-tupu.wav +0 -0
- sample_audios2/igbo/20240122135631-34-2029-550094-goomenti-kwuputara-ezumike-oha.wav +0 -0
- sample_audios2/igbo/20240131131539-49-2001-543334-mba-agaghi-m-aga-ogbako-ndoron.wav +0 -0
- sample_audios2/igbo/20240203145741-28-909-190510-ikpe-igbu-mmadu-na-adota-ntara.wav +0 -0
- sample_audios2/igbo/20240215234709-106-1125-341760-n-otutu-obodo-otutu-n-ime-ulo.wav +0 -0
- sample_audios2/igbo/20240229104336-109-1132-343338-ada-chiri-nwa-enwe-n-elu-ka-at.wav +0 -0
- sample_audios2/igbo/20240301110009-22-1977-537514-aga-m-agwa-ya-okwu-echi.wav +0 -0
- sample_audios2/igbo/20240301130259-42-1857-525921-adaku-chefuru-inye-okwa-akwa-u.wav +0 -0
- sample_audios2/igbo/20240308165214-101-1076-330019-ebere-na-eri-nri-n-ite.wav +0 -0
- sample_audios2/igbo/20240323005358-188-3285-1229656-ha-ga-aru-shopin-molu-n-odinih.wav +0 -0
.DS_Store
ADDED
|
Binary file (6.15 kB). View file
|
|
|
app.py
CHANGED
|
@@ -8,7 +8,7 @@ import csv
|
|
| 8 |
st.header("Practice Section")
|
| 9 |
|
| 10 |
# Replace this with the directory containing the sample audio files
|
| 11 |
-
PRACTICE_AUDIO_DIR = '
|
| 12 |
|
| 13 |
# Replace this with the excel file (converted to csv) that contains the answers.
|
| 14 |
practice_data = pd.read_csv('practice-quiz.csv')
|
|
|
|
| 8 |
st.header("Practice Section")
|
| 9 |
|
| 10 |
# Replace this with the directory containing the sample audio files
|
| 11 |
+
PRACTICE_AUDIO_DIR = 'audio/'
|
| 12 |
|
| 13 |
# Replace this with the excel file (converted to csv) that contains the answers.
|
| 14 |
practice_data = pd.read_csv('practice-quiz.csv')
|
sample_audios2/.DS_Store
ADDED
|
Binary file (6.15 kB). View file
|
|
|
sample_audios2/get_sample_audios.py
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import json
|
| 3 |
+
import os
|
| 4 |
+
from subprocess import call
|
| 5 |
+
from tqdm import tqdm
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
train_path = '/home/mila/c/chris.emezue/naijavoices-research/abraham/data/train.csv'
|
| 9 |
+
AUDIO_DIR = '/home/mila/c/chris.emezue/scratch/naijavoices-data/naijavoices-dataset-compressed/audio-unconverted'
|
| 10 |
+
DESTINATION_AUDIO_PATH = '/home/mila/c/chris.emezue/naijavoices-research/sample_audios'
|
| 11 |
+
df_train = pd.read_csv(train_path)
|
| 12 |
+
|
| 13 |
+
def get_sample_audios_per_language(df,language: str,n_sample: int=30):
|
| 14 |
+
df_language = df[df['language']==language]
|
| 15 |
+
df_sampled = df_language.sample(n = n_sample)
|
| 16 |
+
audio_paths = df_sampled['audio'].values.tolist()
|
| 17 |
+
transcripts = df_sampled['text'].values.tolist()
|
| 18 |
+
audio_paths = [(os.path.join(AUDIO_DIR,a),t,language) for t,a in zip(transcripts,audio_paths)]
|
| 19 |
+
return audio_paths
|
| 20 |
+
|
| 21 |
+
ig_samples = get_sample_audios_per_language(df_train,'igbo')
|
| 22 |
+
yo_samples = get_sample_audios_per_language(df_train,'yoruba')
|
| 23 |
+
ha_samples = get_sample_audios_per_language(df_train,'hausa')
|
| 24 |
+
|
| 25 |
+
all_samples = ig_samples + yo_samples + ha_samples
|
| 26 |
+
|
| 27 |
+
breakpoint()
|
| 28 |
+
def copy_files(file,transcript,language):
|
| 29 |
+
audio_path_copy_to = os.path.join(DESTINATION_AUDIO_PATH,language)
|
| 30 |
+
basename = os.path.basename(file)
|
| 31 |
+
copy_to_audio_path = os.path.join(audio_path_copy_to,basename)
|
| 32 |
+
cmd = ['cp', file, copy_to_audio_path]
|
| 33 |
+
call(cmd)
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
for sample in tqdm(all_samples,desc='Copying files...'):
|
| 37 |
+
copy_files(sample[0],sample[1],sample[2])
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
audio_names = [os.path.basename(a[0]) for a in all_samples]
|
| 42 |
+
texts = [a[1] for a in all_samples]
|
| 43 |
+
langs = [a[2] for a in all_samples]
|
| 44 |
+
|
| 45 |
+
df = pd.DataFrame({'audio':audio_names,'transcript':texts,'language':langs})
|
| 46 |
+
df.to_csv('sample_audios.csv',index=False)
|
sample_audios2/hausa/20240112000734-65-1911-479965-ijie-yara-a-gidan-masun-kulla.wav
ADDED
|
Binary file (78.9 kB). View file
|
|
|
sample_audios2/hausa/20240112081950-157-1376-295388-amaryar-ta-sa-lalle-jiya.wav
ADDED
|
Binary file (32.3 kB). View file
|
|
|
sample_audios2/hausa/20240112112416-64-1950-482478-ka-ba-ni-giya-biyar-halitta.wav
ADDED
|
Binary file (34.3 kB). View file
|
|
|
sample_audios2/hausa/20240112125638-67-1866-469130-yaron-da-aka-reno-da-kyau-shin.wav
ADDED
|
Binary file (39.9 kB). View file
|
|
|
sample_audios2/hausa/20240112131726-67-1866-469159-ana-saka-agogo-ne-a-dakin-bacc.wav
ADDED
|
Binary file (20.2 kB). View file
|
|
|
sample_audios2/hausa/20240112134018-67-1866-469188-wani-kansila-ya-ginawa-wata-kw.wav
ADDED
|
Binary file (30.1 kB). View file
|
|
|
sample_audios2/hausa/20240116152848-157-1281-272584-a-arewa-mata-na-wankan-ruwan-z.wav
ADDED
|
Binary file (23.4 kB). View file
|
|
|
sample_audios2/hausa/20240117082346-157-1281-272642-a-da-famfas-ba-tsada.wav
ADDED
|
Binary file (9.99 kB). View file
|
|
|
sample_audios2/hausa/20240117134055-67-1882-472839-za-ka-siyo-sabuwar-motar-kuwa.wav
ADDED
|
Binary file (28.6 kB). View file
|
|
|
sample_audios2/hausa/20240121115511-55-2438-518642-wane-irin-abun-sha-kuke-da-su.wav
ADDED
|
Binary file (102 kB). View file
|
|
|
sample_audios2/hausa/20240121172900-66-2293-488230-nace-maka-ka-tura-yaron-nan-ma.wav
ADDED
|
Binary file (20.6 kB). View file
|
|
|
sample_audios2/hausa/20240204121303-161-2398-513605-akwai-wurin-sauka-da-tashin-ji.wav
ADDED
|
Binary file (25.9 kB). View file
|
|
|
sample_audios2/hausa/20240213144054-80-2528-633509-tsaftace-kicin-sau-aya-a-mako.wav
ADDED
|
Binary file (42 kB). View file
|
|
|
sample_audios2/hausa/20240214093423-162-1310-279512-ta-o-ja-ce.wav
ADDED
|
Binary file (7.31 kB). View file
|
|
|
sample_audios2/hausa/20240218162107-154-2563-668567-a-shekara-ta-dubu-biyu-da-tala.wav
ADDED
|
Binary file (43 kB). View file
|
|
|
sample_audios2/hausa/20240302083136-164-1320-281849-kawuna-ya-kashe-maku-an-ku-i-w.wav
ADDED
|
Binary file (41 kB). View file
|
|
|
sample_audios2/hausa/20240304141707-164-2408-515797-tugo-ya-zagaya-yanar-gizo-sosa.wav
ADDED
|
Binary file (52.7 kB). View file
|
|
|
sample_audios2/hausa/20240321193009-269-5308-1073284-e-ma-aikatan-na-da-ha-in-kai.wav
ADDED
|
Binary file (12.3 kB). View file
|
|
|
sample_audios2/hausa/20240321221217-266-5245-1058041-akwai-ban-aki-a-kowane-cikin-j.wav
ADDED
|
Binary file (28.5 kB). View file
|
|
|
sample_audios2/hausa/20240322182009-259-5053-1011949-fesbuk-na-amfani-da-data.wav
ADDED
|
Binary file (44.3 kB). View file
|
|
|
sample_audios2/hausa/20240322230610-260-5076-1017618-tun-kafin-ya-yi-aure-yake-addu.wav
ADDED
|
Binary file (67.6 kB). View file
|
|
|
sample_audios2/hausa/20240323072835-270-5336-1079890-bala-ya-bude-wani-shagon-sayar.wav
ADDED
|
Binary file (48.1 kB). View file
|
|
|
sample_audios2/hausa/20240328153436-263-5788-1398462-kuna-son-lokacin-da-abokin-tar.wav
ADDED
|
Binary file (21.4 kB). View file
|
|
|
sample_audios2/hausa/20240328155606-277-5936-1433978-lala-na-can-zata-sayi-ragon-su.wav
ADDED
|
Binary file (59.5 kB). View file
|
|
|
sample_audios2/hausa/20240329120557-270-5866-1417132-jeka-ka-gina-gidan-ka.wav
ADDED
|
Binary file (30.8 kB). View file
|
|
|
sample_audios2/hausa/20240417183811-265-5220-1052239-mata-da-miji-sukan-samu-sa-ani.wav
ADDED
|
Binary file (24.9 kB). View file
|
|
|
sample_audios2/igbo/20231219164207-47-289-65711-a-ga-m-emepe-ahi-a-ngbanwe-ego.wav
ADDED
|
Binary file (21 kB). View file
|
|
|
sample_audios2/igbo/20231226194928-42-597-162804-onye-nche-l-akwkw-any-nwe.wav
ADDED
|
Binary file (35.5 kB). View file
|
|
|
sample_audios2/igbo/20231227230415-25-232-51915-n-oge-gara-aga-ike-isi-uru-nk.wav
ADDED
|
Binary file (44.3 kB). View file
|
|
|
sample_audios2/igbo/20231231070219-8-472-156842-kedu-ihe--mere-iji-nweta-nsona.wav
ADDED
|
Binary file (31.2 kB). View file
|
|
|
sample_audios2/igbo/20240107182227-52-1799-376695-ee-m-ga-akwado-pati-lp.wav
ADDED
|
Binary file (35.2 kB). View file
|
|
|
sample_audios2/igbo/20240112075728-18-165-35795-i-ga-ara-mgbimgbi-ahu-echi.wav
ADDED
|
Binary file (14.8 kB). View file
|
|
|
sample_audios2/igbo/20240112090202-18-165-35812-nna-m-ji-hama-otu-n-ime-ngwaor.wav
ADDED
|
Binary file (36.7 kB). View file
|
|
|
sample_audios2/igbo/20240112102109-18-165-35910-i-na-akoro-ha-akuko-ifo.wav
ADDED
|
Binary file (15.5 kB). View file
|
|
|
sample_audios2/igbo/20240112110354-33-349-80086-i-choro-kaadi-ntuli-aka.wav
ADDED
|
Binary file (32.2 kB). View file
|
|
|
sample_audios2/igbo/20240113124748-22-220-49131-o-nweghi-i-ke-iricha-piza-o-zu.wav
ADDED
|
Binary file (59.5 kB). View file
|
|
|
sample_audios2/igbo/20240114072828-107-1938-531871-a-na-amu-asusu-o-bula-amu-tupu.wav
ADDED
|
Binary file (20.6 kB). View file
|
|
|
sample_audios2/igbo/20240122135631-34-2029-550094-goomenti-kwuputara-ezumike-oha.wav
ADDED
|
Binary file (39.2 kB). View file
|
|
|
sample_audios2/igbo/20240131131539-49-2001-543334-mba-agaghi-m-aga-ogbako-ndoron.wav
ADDED
|
Binary file (22.6 kB). View file
|
|
|
sample_audios2/igbo/20240203145741-28-909-190510-ikpe-igbu-mmadu-na-adota-ntara.wav
ADDED
|
Binary file (34.3 kB). View file
|
|
|
sample_audios2/igbo/20240215234709-106-1125-341760-n-otutu-obodo-otutu-n-ime-ulo.wav
ADDED
|
Binary file (72.1 kB). View file
|
|
|
sample_audios2/igbo/20240229104336-109-1132-343338-ada-chiri-nwa-enwe-n-elu-ka-at.wav
ADDED
|
Binary file (33.2 kB). View file
|
|
|
sample_audios2/igbo/20240301110009-22-1977-537514-aga-m-agwa-ya-okwu-echi.wav
ADDED
|
Binary file (35.2 kB). View file
|
|
|
sample_audios2/igbo/20240301130259-42-1857-525921-adaku-chefuru-inye-okwa-akwa-u.wav
ADDED
|
Binary file (43.3 kB). View file
|
|
|
sample_audios2/igbo/20240308165214-101-1076-330019-ebere-na-eri-nri-n-ite.wav
ADDED
|
Binary file (34 kB). View file
|
|
|
sample_audios2/igbo/20240323005358-188-3285-1229656-ha-ga-aru-shopin-molu-n-odinih.wav
ADDED
|
Binary file (17.8 kB). View file
|
|
|