bumie-e commited on
Commit
5a0849d
·
1 Parent(s): 9b8cc7f

Adding all sample audios

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .DS_Store +0 -0
  2. app.py +1 -1
  3. sample_audios2/.DS_Store +0 -0
  4. sample_audios2/get_sample_audios.py +46 -0
  5. sample_audios2/hausa/20240112000734-65-1911-479965-ijie-yara-a-gidan-masun-kulla.wav +0 -0
  6. sample_audios2/hausa/20240112081950-157-1376-295388-amaryar-ta-sa-lalle-jiya.wav +0 -0
  7. sample_audios2/hausa/20240112112416-64-1950-482478-ka-ba-ni-giya-biyar-halitta.wav +0 -0
  8. sample_audios2/hausa/20240112125638-67-1866-469130-yaron-da-aka-reno-da-kyau-shin.wav +0 -0
  9. sample_audios2/hausa/20240112131726-67-1866-469159-ana-saka-agogo-ne-a-dakin-bacc.wav +0 -0
  10. sample_audios2/hausa/20240112134018-67-1866-469188-wani-kansila-ya-ginawa-wata-kw.wav +0 -0
  11. sample_audios2/hausa/20240116152848-157-1281-272584-a-arewa-mata-na-wankan-ruwan-z.wav +0 -0
  12. sample_audios2/hausa/20240117082346-157-1281-272642-a-da-famfas-ba-tsada.wav +0 -0
  13. sample_audios2/hausa/20240117134055-67-1882-472839-za-ka-siyo-sabuwar-motar-kuwa.wav +0 -0
  14. sample_audios2/hausa/20240121115511-55-2438-518642-wane-irin-abun-sha-kuke-da-su.wav +0 -0
  15. sample_audios2/hausa/20240121172900-66-2293-488230-nace-maka-ka-tura-yaron-nan-ma.wav +0 -0
  16. sample_audios2/hausa/20240204121303-161-2398-513605-akwai-wurin-sauka-da-tashin-ji.wav +0 -0
  17. sample_audios2/hausa/20240213144054-80-2528-633509-tsaftace-kicin-sau-aya-a-mako.wav +0 -0
  18. sample_audios2/hausa/20240214093423-162-1310-279512-ta-o-ja-ce.wav +0 -0
  19. sample_audios2/hausa/20240218162107-154-2563-668567-a-shekara-ta-dubu-biyu-da-tala.wav +0 -0
  20. sample_audios2/hausa/20240302083136-164-1320-281849-kawuna-ya-kashe-maku-an-ku-i-w.wav +0 -0
  21. sample_audios2/hausa/20240304141707-164-2408-515797-tugo-ya-zagaya-yanar-gizo-sosa.wav +0 -0
  22. sample_audios2/hausa/20240321193009-269-5308-1073284-e-ma-aikatan-na-da-ha-in-kai.wav +0 -0
  23. sample_audios2/hausa/20240321221217-266-5245-1058041-akwai-ban-aki-a-kowane-cikin-j.wav +0 -0
  24. sample_audios2/hausa/20240322182009-259-5053-1011949-fesbuk-na-amfani-da-data.wav +0 -0
  25. sample_audios2/hausa/20240322230610-260-5076-1017618-tun-kafin-ya-yi-aure-yake-addu.wav +0 -0
  26. sample_audios2/hausa/20240323072835-270-5336-1079890-bala-ya-bude-wani-shagon-sayar.wav +0 -0
  27. sample_audios2/hausa/20240328153436-263-5788-1398462-kuna-son-lokacin-da-abokin-tar.wav +0 -0
  28. sample_audios2/hausa/20240328155606-277-5936-1433978-lala-na-can-zata-sayi-ragon-su.wav +0 -0
  29. sample_audios2/hausa/20240329120557-270-5866-1417132-jeka-ka-gina-gidan-ka.wav +0 -0
  30. sample_audios2/hausa/20240417183811-265-5220-1052239-mata-da-miji-sukan-samu-sa-ani.wav +0 -0
  31. sample_audios2/igbo/20231219164207-47-289-65711-a-ga-m-emepe-ahi-a-ngbanwe-ego.wav +0 -0
  32. sample_audios2/igbo/20231226194928-42-597-162804-onye-nche-l-akwkw-any-nwe.wav +0 -0
  33. sample_audios2/igbo/20231227230415-25-232-51915-n-oge-gara-aga-ike-isi-uru-nk.wav +0 -0
  34. sample_audios2/igbo/20231231070219-8-472-156842-kedu-ihe--mere-iji-nweta-nsona.wav +0 -0
  35. sample_audios2/igbo/20240107182227-52-1799-376695-ee-m-ga-akwado-pati-lp.wav +0 -0
  36. sample_audios2/igbo/20240112075728-18-165-35795-i-ga-ara-mgbimgbi-ahu-echi.wav +0 -0
  37. sample_audios2/igbo/20240112090202-18-165-35812-nna-m-ji-hama-otu-n-ime-ngwaor.wav +0 -0
  38. sample_audios2/igbo/20240112102109-18-165-35910-i-na-akoro-ha-akuko-ifo.wav +0 -0
  39. sample_audios2/igbo/20240112110354-33-349-80086-i-choro-kaadi-ntuli-aka.wav +0 -0
  40. sample_audios2/igbo/20240113124748-22-220-49131-o-nweghi-i-ke-iricha-piza-o-zu.wav +0 -0
  41. sample_audios2/igbo/20240114072828-107-1938-531871-a-na-amu-asusu-o-bula-amu-tupu.wav +0 -0
  42. sample_audios2/igbo/20240122135631-34-2029-550094-goomenti-kwuputara-ezumike-oha.wav +0 -0
  43. sample_audios2/igbo/20240131131539-49-2001-543334-mba-agaghi-m-aga-ogbako-ndoron.wav +0 -0
  44. sample_audios2/igbo/20240203145741-28-909-190510-ikpe-igbu-mmadu-na-adota-ntara.wav +0 -0
  45. sample_audios2/igbo/20240215234709-106-1125-341760-n-otutu-obodo-otutu-n-ime-ulo.wav +0 -0
  46. sample_audios2/igbo/20240229104336-109-1132-343338-ada-chiri-nwa-enwe-n-elu-ka-at.wav +0 -0
  47. sample_audios2/igbo/20240301110009-22-1977-537514-aga-m-agwa-ya-okwu-echi.wav +0 -0
  48. sample_audios2/igbo/20240301130259-42-1857-525921-adaku-chefuru-inye-okwa-akwa-u.wav +0 -0
  49. sample_audios2/igbo/20240308165214-101-1076-330019-ebere-na-eri-nri-n-ite.wav +0 -0
  50. sample_audios2/igbo/20240323005358-188-3285-1229656-ha-ga-aru-shopin-molu-n-odinih.wav +0 -0
.DS_Store ADDED
Binary file (6.15 kB). View file
 
app.py CHANGED
@@ -8,7 +8,7 @@ import csv
8
  st.header("Practice Section")
9
 
10
  # Replace this with the directory containing the sample audio files
11
- PRACTICE_AUDIO_DIR = './sample_audios2/'
12
 
13
  # Replace this with the excel file (converted to csv) that contains the answers.
14
  practice_data = pd.read_csv('practice-quiz.csv')
 
8
  st.header("Practice Section")
9
 
10
  # Replace this with the directory containing the sample audio files
11
+ PRACTICE_AUDIO_DIR = 'audio/'
12
 
13
  # Replace this with the excel file (converted to csv) that contains the answers.
14
  practice_data = pd.read_csv('practice-quiz.csv')
sample_audios2/.DS_Store ADDED
Binary file (6.15 kB). View file
 
sample_audios2/get_sample_audios.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import json
3
+ import os
4
+ from subprocess import call
5
+ from tqdm import tqdm
6
+
7
+
8
+ train_path = '/home/mila/c/chris.emezue/naijavoices-research/abraham/data/train.csv'
9
+ AUDIO_DIR = '/home/mila/c/chris.emezue/scratch/naijavoices-data/naijavoices-dataset-compressed/audio-unconverted'
10
+ DESTINATION_AUDIO_PATH = '/home/mila/c/chris.emezue/naijavoices-research/sample_audios'
11
+ df_train = pd.read_csv(train_path)
12
+
13
+ def get_sample_audios_per_language(df,language: str,n_sample: int=30):
14
+ df_language = df[df['language']==language]
15
+ df_sampled = df_language.sample(n = n_sample)
16
+ audio_paths = df_sampled['audio'].values.tolist()
17
+ transcripts = df_sampled['text'].values.tolist()
18
+ audio_paths = [(os.path.join(AUDIO_DIR,a),t,language) for t,a in zip(transcripts,audio_paths)]
19
+ return audio_paths
20
+
21
+ ig_samples = get_sample_audios_per_language(df_train,'igbo')
22
+ yo_samples = get_sample_audios_per_language(df_train,'yoruba')
23
+ ha_samples = get_sample_audios_per_language(df_train,'hausa')
24
+
25
+ all_samples = ig_samples + yo_samples + ha_samples
26
+
27
+ breakpoint()
28
+ def copy_files(file,transcript,language):
29
+ audio_path_copy_to = os.path.join(DESTINATION_AUDIO_PATH,language)
30
+ basename = os.path.basename(file)
31
+ copy_to_audio_path = os.path.join(audio_path_copy_to,basename)
32
+ cmd = ['cp', file, copy_to_audio_path]
33
+ call(cmd)
34
+
35
+
36
+ for sample in tqdm(all_samples,desc='Copying files...'):
37
+ copy_files(sample[0],sample[1],sample[2])
38
+
39
+
40
+
41
+ audio_names = [os.path.basename(a[0]) for a in all_samples]
42
+ texts = [a[1] for a in all_samples]
43
+ langs = [a[2] for a in all_samples]
44
+
45
+ df = pd.DataFrame({'audio':audio_names,'transcript':texts,'language':langs})
46
+ df.to_csv('sample_audios.csv',index=False)
sample_audios2/hausa/20240112000734-65-1911-479965-ijie-yara-a-gidan-masun-kulla.wav ADDED
Binary file (78.9 kB). View file
 
sample_audios2/hausa/20240112081950-157-1376-295388-amaryar-ta-sa-lalle-jiya.wav ADDED
Binary file (32.3 kB). View file
 
sample_audios2/hausa/20240112112416-64-1950-482478-ka-ba-ni-giya-biyar-halitta.wav ADDED
Binary file (34.3 kB). View file
 
sample_audios2/hausa/20240112125638-67-1866-469130-yaron-da-aka-reno-da-kyau-shin.wav ADDED
Binary file (39.9 kB). View file
 
sample_audios2/hausa/20240112131726-67-1866-469159-ana-saka-agogo-ne-a-dakin-bacc.wav ADDED
Binary file (20.2 kB). View file
 
sample_audios2/hausa/20240112134018-67-1866-469188-wani-kansila-ya-ginawa-wata-kw.wav ADDED
Binary file (30.1 kB). View file
 
sample_audios2/hausa/20240116152848-157-1281-272584-a-arewa-mata-na-wankan-ruwan-z.wav ADDED
Binary file (23.4 kB). View file
 
sample_audios2/hausa/20240117082346-157-1281-272642-a-da-famfas-ba-tsada.wav ADDED
Binary file (9.99 kB). View file
 
sample_audios2/hausa/20240117134055-67-1882-472839-za-ka-siyo-sabuwar-motar-kuwa.wav ADDED
Binary file (28.6 kB). View file
 
sample_audios2/hausa/20240121115511-55-2438-518642-wane-irin-abun-sha-kuke-da-su.wav ADDED
Binary file (102 kB). View file
 
sample_audios2/hausa/20240121172900-66-2293-488230-nace-maka-ka-tura-yaron-nan-ma.wav ADDED
Binary file (20.6 kB). View file
 
sample_audios2/hausa/20240204121303-161-2398-513605-akwai-wurin-sauka-da-tashin-ji.wav ADDED
Binary file (25.9 kB). View file
 
sample_audios2/hausa/20240213144054-80-2528-633509-tsaftace-kicin-sau-aya-a-mako.wav ADDED
Binary file (42 kB). View file
 
sample_audios2/hausa/20240214093423-162-1310-279512-ta-o-ja-ce.wav ADDED
Binary file (7.31 kB). View file
 
sample_audios2/hausa/20240218162107-154-2563-668567-a-shekara-ta-dubu-biyu-da-tala.wav ADDED
Binary file (43 kB). View file
 
sample_audios2/hausa/20240302083136-164-1320-281849-kawuna-ya-kashe-maku-an-ku-i-w.wav ADDED
Binary file (41 kB). View file
 
sample_audios2/hausa/20240304141707-164-2408-515797-tugo-ya-zagaya-yanar-gizo-sosa.wav ADDED
Binary file (52.7 kB). View file
 
sample_audios2/hausa/20240321193009-269-5308-1073284-e-ma-aikatan-na-da-ha-in-kai.wav ADDED
Binary file (12.3 kB). View file
 
sample_audios2/hausa/20240321221217-266-5245-1058041-akwai-ban-aki-a-kowane-cikin-j.wav ADDED
Binary file (28.5 kB). View file
 
sample_audios2/hausa/20240322182009-259-5053-1011949-fesbuk-na-amfani-da-data.wav ADDED
Binary file (44.3 kB). View file
 
sample_audios2/hausa/20240322230610-260-5076-1017618-tun-kafin-ya-yi-aure-yake-addu.wav ADDED
Binary file (67.6 kB). View file
 
sample_audios2/hausa/20240323072835-270-5336-1079890-bala-ya-bude-wani-shagon-sayar.wav ADDED
Binary file (48.1 kB). View file
 
sample_audios2/hausa/20240328153436-263-5788-1398462-kuna-son-lokacin-da-abokin-tar.wav ADDED
Binary file (21.4 kB). View file
 
sample_audios2/hausa/20240328155606-277-5936-1433978-lala-na-can-zata-sayi-ragon-su.wav ADDED
Binary file (59.5 kB). View file
 
sample_audios2/hausa/20240329120557-270-5866-1417132-jeka-ka-gina-gidan-ka.wav ADDED
Binary file (30.8 kB). View file
 
sample_audios2/hausa/20240417183811-265-5220-1052239-mata-da-miji-sukan-samu-sa-ani.wav ADDED
Binary file (24.9 kB). View file
 
sample_audios2/igbo/20231219164207-47-289-65711-a-ga-m-emepe-ahi-a-ngbanwe-ego.wav ADDED
Binary file (21 kB). View file
 
sample_audios2/igbo/20231226194928-42-597-162804-onye-nche-l-akwkw-any-nwe.wav ADDED
Binary file (35.5 kB). View file
 
sample_audios2/igbo/20231227230415-25-232-51915-n-oge-gara-aga-ike-isi-uru-nk.wav ADDED
Binary file (44.3 kB). View file
 
sample_audios2/igbo/20231231070219-8-472-156842-kedu-ihe--mere-iji-nweta-nsona.wav ADDED
Binary file (31.2 kB). View file
 
sample_audios2/igbo/20240107182227-52-1799-376695-ee-m-ga-akwado-pati-lp.wav ADDED
Binary file (35.2 kB). View file
 
sample_audios2/igbo/20240112075728-18-165-35795-i-ga-ara-mgbimgbi-ahu-echi.wav ADDED
Binary file (14.8 kB). View file
 
sample_audios2/igbo/20240112090202-18-165-35812-nna-m-ji-hama-otu-n-ime-ngwaor.wav ADDED
Binary file (36.7 kB). View file
 
sample_audios2/igbo/20240112102109-18-165-35910-i-na-akoro-ha-akuko-ifo.wav ADDED
Binary file (15.5 kB). View file
 
sample_audios2/igbo/20240112110354-33-349-80086-i-choro-kaadi-ntuli-aka.wav ADDED
Binary file (32.2 kB). View file
 
sample_audios2/igbo/20240113124748-22-220-49131-o-nweghi-i-ke-iricha-piza-o-zu.wav ADDED
Binary file (59.5 kB). View file
 
sample_audios2/igbo/20240114072828-107-1938-531871-a-na-amu-asusu-o-bula-amu-tupu.wav ADDED
Binary file (20.6 kB). View file
 
sample_audios2/igbo/20240122135631-34-2029-550094-goomenti-kwuputara-ezumike-oha.wav ADDED
Binary file (39.2 kB). View file
 
sample_audios2/igbo/20240131131539-49-2001-543334-mba-agaghi-m-aga-ogbako-ndoron.wav ADDED
Binary file (22.6 kB). View file
 
sample_audios2/igbo/20240203145741-28-909-190510-ikpe-igbu-mmadu-na-adota-ntara.wav ADDED
Binary file (34.3 kB). View file
 
sample_audios2/igbo/20240215234709-106-1125-341760-n-otutu-obodo-otutu-n-ime-ulo.wav ADDED
Binary file (72.1 kB). View file
 
sample_audios2/igbo/20240229104336-109-1132-343338-ada-chiri-nwa-enwe-n-elu-ka-at.wav ADDED
Binary file (33.2 kB). View file
 
sample_audios2/igbo/20240301110009-22-1977-537514-aga-m-agwa-ya-okwu-echi.wav ADDED
Binary file (35.2 kB). View file
 
sample_audios2/igbo/20240301130259-42-1857-525921-adaku-chefuru-inye-okwa-akwa-u.wav ADDED
Binary file (43.3 kB). View file
 
sample_audios2/igbo/20240308165214-101-1076-330019-ebere-na-eri-nri-n-ite.wav ADDED
Binary file (34 kB). View file
 
sample_audios2/igbo/20240323005358-188-3285-1229656-ha-ga-aru-shopin-molu-n-odinih.wav ADDED
Binary file (17.8 kB). View file