Demo / AudioLogger.py
muhammad-abdullah's picture
initial commit
2298c8c
import datetime
import time
import tempfile
import os
import scipy
import torch
import numpy as np
import json
class AudioLogger():
def __init__(self, log_file='temp/audio_log.txt'):
self.log_file = log_file
self.history = []
self.conversational_history = []
def save_audio_to_local(self, audio, sec):
if isinstance(audio, torch.Tensor):
audio = audio.cpu().numpy()
if isinstance(audio, list):
audio = np.asarray(audio)
sampling_rate = 32000
if len(audio) > 0:
if not os.path.exists('temp'):
os.mkdir('temp')
filename = os.path.join('temp', next(tempfile._get_candidate_names()) + '.wav')
scipy.io.wavfile.write(filename, rate=sampling_rate, data=audio)
return filename
def log_audio(self, task, audio, duration, prompt, generation_flag, music_caption,play_steps_in_s,extend_stride, seed, top_p = 0.8, top_k = 50, temperature = 0.8,do_sample=True, guidance_scale=3, context_audio_path=None):
filename = self.save_audio_to_local(audio, duration)
audio_gen_args = {
"play_steps_in_s": play_steps_in_s,
"extend_stride": extend_stride,
"top_p": top_p,
"top_k": top_k,
"temperature": temperature,
"seed": seed,
"do_sample": do_sample,
"guidance_scale": guidance_scale,
}
log_entry = {
"task": task,
"audio_path": filename,
"prompt": prompt, # user input prompt. if generation flag is false then this prompt was used to generate the music. If generation flag is true then this prompt was sent to M2UGen and the music was generated using the music caption
"generation_flag": generation_flag,
"music_caption": music_caption,
"audio_gen_args": audio_gen_args,
"duration": duration,
"context_audio_path": context_audio_path,
"timestamp": datetime.datetime.now().isoformat()
}
# Append to history
self.history.append(log_entry)
# Append log entry to file
with open(self.log_file, 'a') as f:
f.write(json.dumps(log_entry) + '\n')
print(f'Log entry added for {filename}')
def get_last_log(self):
for log_entry in reversed(self.history):
if log_entry["audio_path"]:
return log_entry
return None