Spaces:

marianna13
/

annotate-audio

Runtime error

App Files Files Community

marianna13 commited on Feb 25, 2023

Commit

4766c38

1 Parent(s): 2b05ce7

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -29

app.py CHANGED Viewed

@@ -6,20 +6,12 @@ import string
 import pandas as pd
 import os
 import requests
-os.system('python -m spacy download en_core_web_sm')
 nlp = spacy.load("en_core_web_sm")
 nlp.add_pipe('sentencizer')
-def read_gs(sheet_url):
-    s_url = sheet_url.replace('/edit#gid=', '/export?format=csv&gid=')
-    df = pd.read_csv(s_url)
-    return df
 def download_and_save_file(URL, audio_dir):
     headers = {
         'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36',
@@ -33,45 +25,45 @@ def download_and_save_file(URL, audio_dir):
     file_name = URL.split('/')[-1].split('?')[0]
     audio_path = f'{audio_dir}/{file_name}'
     with open(audio_path, 'wb') as f:
-        f.write(doc.content)
     return audio_path
-def select_samples():
-    df = read_gs('https://docs.google.com/spreadsheets/d/17QG4puJRXN8V5froIv8YrJIMsns0GTt4/edit#gid=1020901598')
-    audio_dir = 'AUDIO'
-    os.makedirs(audio_dir, exist_ok=True)
-    df = df.sample(1)
-    audio_url = df.url.values[0]
-    audio_path = download_and_save_file(audio_url, audio_dir)
-    return audio_path, df['text'].values[0]
 title = '🎵 Annotate audio'
 description = '''Choose a sentence that describes audio the best if there's no such sentence please choose `No audio description`'''
-audio_path, full_text = select_samples()
 full_text = full_text.translate(str.maketrans('', '', string.punctuation))
-sents = [re.sub(r'###audio###\d###', '', s.text) for s in nlp(full_text).sents]
 sents.append('No audio description')
-def audio_demo(text, audio, audio_id):
-  with open('data.json', 'w') as f:
-    data = {
-        'audio':audio_id,
-        'text':text
-    }
-    json.dump(data, f)
     return 'success!'
 iface = gr.Interface(
     audio_demo,
-    inputs=[gr.Dropdown(sents, label='audio description'), gr.Audio(audio_path, type="filepath"), gr.Textbox(value=audio_path, visible=False)],
     outputs=[gr.Textbox(label="output")],
     allow_flagging="never",
     title=title,

 import pandas as pd
 import os
 import requests
+from textwrap import wrap
 nlp = spacy.load("en_core_web_sm")
 nlp.add_pipe('sentencizer')
 def download_and_save_file(URL, audio_dir):
     headers = {
         'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36',
     file_name = URL.split('/')[-1].split('?')[0]
     audio_path = f'{audio_dir}/{file_name}'
     with open(audio_path, 'wb') as f:
+        f.write(doc.content)
     return audio_path
+credentials = os.environ['CREDENTIALS']
+data = json.loads(credentials, strict=False)
+with open('credentials.json', 'w') as f:
+    json.dump(data, f)
+gc = gspread.service_account(filename='credentials.json')
+sh = gc.open('Annotated CC Audio')
+worksheet = sh.sheet1
+df = pd.DataFrame(worksheet.get_all_records())
+sample_df = df[df['caption']==''].sample(1)
 title = '🎵 Annotate audio'
 description = '''Choose a sentence that describes audio the best if there's no such sentence please choose `No audio description`'''
+audio_dir = 'AUDIO'
+os.makedirs(audio_dir, exist_ok=True)
+audio_id, audio_url, full_text, _ = sample_df.values[0]
+audio_path = download_and_save_file(audio_url, audio_dir)
 full_text = full_text.translate(str.maketrans('', '', string.punctuation))
+sents = ['\n'.join(wrap(re.sub(r'###audio###\d###', '', s.text), width=70) )for s in nlp(full_text).sents]
 sents.append('No audio description')
+def audio_demo(cap, audio, audio_id):
+    df.at[int(audio_id)-1, 'caption'] = cap
+    worksheet.update([df.columns.values.tolist()] + df.values.tolist())
     return 'success!'
 iface = gr.Interface(
     audio_demo,
+    inputs=[gr.Dropdown(sents, label='audio description'), gr.Audio(audio_path, type="filepath"), gr.Textbox(value=audio_id, visible=False)],
     outputs=[gr.Textbox(label="output")],
     allow_flagging="never",
     title=title,