|
|
import pandas as pd |
|
|
import audioread |
|
|
from tqdm import tqdm |
|
|
from tqdm.contrib.concurrent import process_map |
|
|
|
|
|
def map_duration(tsv_withdur,tsv_toadd): |
|
|
df1 = pd.read_csv(tsv_withdur,sep='\t') |
|
|
df2 = pd.read_csv(tsv_toadd,sep='\t') |
|
|
|
|
|
df = df2.merge(df1,on=['name'],suffixes=['','_y']) |
|
|
dropset = list(set(df.columns) - set(df1.columns)) |
|
|
df = df.drop(dropset,axis=1) |
|
|
df.to_csv(tsv_toadd,sep='\t',index=False) |
|
|
return df |
|
|
|
|
|
def add_duration(args): |
|
|
index,audiopath = args |
|
|
try: |
|
|
with audioread.audio_open(audiopath) as f: |
|
|
totalsec = f.duration |
|
|
except: |
|
|
totalsec = -1 |
|
|
return (index,totalsec) |
|
|
|
|
|
def add_dur2tsv(tsv_path,save_path): |
|
|
df = pd.read_csv(tsv_path,sep='\t') |
|
|
item_list = [] |
|
|
for item in tqdm(df.itertuples()): |
|
|
item_list.append((item[0],getattr(item,'audio_path'))) |
|
|
|
|
|
r = process_map(add_duration,item_list,max_workers=16,chunksize=32) |
|
|
index2dur = {} |
|
|
for index,dur in r: |
|
|
if dur == -1: |
|
|
bad_wav = df.loc[index,'audio_path'] |
|
|
print(f'bad wav:{bad_wav}') |
|
|
index2dur[index] = dur |
|
|
|
|
|
df['duration'] = df.index.map(index2dur) |
|
|
df.to_csv(save_path,sep='\t',index=False) |
|
|
|
|
|
if __name__ == '__main__': |
|
|
add_dur2tsv('/root/autodl-tmp/liuhuadai/AudioLCM/now.tsv','/root/autodl-tmp/liuhuadai/AudioLCM/now_duration.tsv') |
|
|
|
|
|
|
|
|
|