temp
Browse files- .vscode/settings.json +26 -0
- __pycache__/app.cpython-310.pyc +0 -0
- __pycache__/lang_list.cpython-310.pyc +0 -0
- __pycache__/m4t_app.cpython-310.pyc +0 -0
- __pycache__/simuleval_transcoder.cpython-310.pyc +0 -0
- __pycache__/test_pipeline.cpython-310.pyc +0 -0
- requirements.txt +1 -1
- seamless_communication +1 -0
- simuleval_transcoder.py +54 -7
.vscode/settings.json
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"[python]": {
|
| 3 |
+
"editor.defaultFormatter": "ms-python.python"
|
| 4 |
+
},
|
| 5 |
+
"python.formatting.provider": "none",
|
| 6 |
+
"workbench.colorCustomizations": {
|
| 7 |
+
"activityBar.activeBackground": "#fbed80",
|
| 8 |
+
"activityBar.background": "#fbed80",
|
| 9 |
+
"activityBar.foreground": "#15202b",
|
| 10 |
+
"activityBar.inactiveForeground": "#15202b99",
|
| 11 |
+
"activityBarBadge.background": "#06b9a5",
|
| 12 |
+
"activityBarBadge.foreground": "#15202b",
|
| 13 |
+
"commandCenter.border": "#15202b99",
|
| 14 |
+
"sash.hoverBorder": "#fbed80",
|
| 15 |
+
"statusBar.background": "#f9e64f",
|
| 16 |
+
"statusBar.foreground": "#15202b",
|
| 17 |
+
"statusBarItem.hoverBackground": "#f7df1e",
|
| 18 |
+
"statusBarItem.remoteBackground": "#f9e64f",
|
| 19 |
+
"statusBarItem.remoteForeground": "#15202b",
|
| 20 |
+
"titleBar.activeBackground": "#f9e64f",
|
| 21 |
+
"titleBar.activeForeground": "#15202b",
|
| 22 |
+
"titleBar.inactiveBackground": "#f9e64f99",
|
| 23 |
+
"titleBar.inactiveForeground": "#15202b99"
|
| 24 |
+
},
|
| 25 |
+
"peacock.remoteColor": "#f9e64f"
|
| 26 |
+
}
|
__pycache__/app.cpython-310.pyc
ADDED
|
Binary file (2.57 kB). View file
|
|
|
__pycache__/lang_list.cpython-310.pyc
ADDED
|
Binary file (4.03 kB). View file
|
|
|
__pycache__/m4t_app.cpython-310.pyc
ADDED
|
Binary file (8.44 kB). View file
|
|
|
__pycache__/simuleval_transcoder.cpython-310.pyc
ADDED
|
Binary file (5.17 kB). View file
|
|
|
__pycache__/test_pipeline.cpython-310.pyc
ADDED
|
Binary file (2.56 kB). View file
|
|
|
requirements.txt
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
# fairseq2==0.1.0
|
| 2 |
|
| 3 |
# Temp to skip
|
| 4 |
-
git+https://github.com/mduppes/fairseq2.git@93420c86ba01349ee8f90d7adda439b666b50557
|
| 5 |
# git+https://github.com/facebookresearch/seamless_communication
|
| 6 |
./seamless_communication
|
| 7 |
# comment this out to test fairseq1 first
|
|
|
|
| 1 |
# fairseq2==0.1.0
|
| 2 |
|
| 3 |
# Temp to skip
|
| 4 |
+
# git+https://github.com/mduppes/fairseq2.git@93420c86ba01349ee8f90d7adda439b666b50557
|
| 5 |
# git+https://github.com/facebookresearch/seamless_communication
|
| 6 |
./seamless_communication
|
| 7 |
# comment this out to test fairseq1 first
|
seamless_communication
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
Subproject commit 02405dfd0c187d625aa66255ff8c39f98031a091
|
simuleval_transcoder.py
CHANGED
|
@@ -31,12 +31,18 @@ from seamless_communication.models.vocoder import load_vocoder_model, Vocoder
|
|
| 31 |
|
| 32 |
|
| 33 |
|
| 34 |
-
from seamless_communication.models.streaming.agents import (
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
)
|
| 39 |
|
|
|
|
|
|
|
| 40 |
### From test_pipeline
|
| 41 |
import math
|
| 42 |
import soundfile
|
|
@@ -98,12 +104,48 @@ def load_model_for_inference(
|
|
| 98 |
model.eval()
|
| 99 |
return model
|
| 100 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 101 |
class SimulevalTranscoder:
|
| 102 |
# def __init__(self, agent, sample_rate, debug, buffer_limit):
|
| 103 |
def __init__(self):
|
| 104 |
-
print("MDUPPES in here", SileroVADAgent, TestTimeWaitKS2TVAD)
|
| 105 |
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
| 106 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
device = "cpu"
|
| 108 |
print("DEVICE", device)
|
| 109 |
model_name_or_card="seamlessM4T_medium"
|
|
@@ -145,8 +187,10 @@ class SimulevalTranscoder:
|
|
| 145 |
|
| 146 |
pipeline = TestTimeWaitKUnityV1M4T(model, args)
|
| 147 |
system_states = pipeline.build_states()
|
| 148 |
-
print('system states')
|
| 149 |
-
|
|
|
|
|
|
|
| 150 |
input_segment = np.empty(0, dtype=np.int16)
|
| 151 |
segments = []
|
| 152 |
while True:
|
|
@@ -156,6 +200,9 @@ class SimulevalTranscoder:
|
|
| 156 |
output_segment = pipeline.pushpop(speech_segment, system_states)
|
| 157 |
print('pushpop result')
|
| 158 |
print(output_segment)
|
|
|
|
|
|
|
|
|
|
| 159 |
if output_segment.finished:
|
| 160 |
segments.append(input_segment)
|
| 161 |
input_segment = np.empty(0, dtype=np.int16)
|
|
|
|
| 31 |
|
| 32 |
|
| 33 |
|
| 34 |
+
# from seamless_communication.models.streaming.agents import (
|
| 35 |
+
# SileroVADAgent,
|
| 36 |
+
# TestTimeWaitKS2TVAD,
|
| 37 |
+
# TestTimeWaitKUnityV1M4T
|
| 38 |
+
# )
|
| 39 |
+
|
| 40 |
+
from seamless_communication.cli.streaming.agents.tt_waitk_unity_s2t_m4t import (
|
| 41 |
+
TestTimeWaitKUnityS2TM4T,
|
| 42 |
)
|
| 43 |
|
| 44 |
+
from seamless_communication.cli.streaming.dataloader import Fairseq2SpeechToTextDataloader
|
| 45 |
+
|
| 46 |
### From test_pipeline
|
| 47 |
import math
|
| 48 |
import soundfile
|
|
|
|
| 104 |
model.eval()
|
| 105 |
return model
|
| 106 |
|
| 107 |
+
def load_model_fairseq2():
|
| 108 |
+
data_configs = dict(
|
| 109 |
+
dataloader="fairseq2_s2t",
|
| 110 |
+
data_file="/large_experiments/seamless/ust/abinesh/data/s2st50_manifests/50-10/simuleval/dev_mtedx_filt_50-10_debug.tsv",
|
| 111 |
+
)
|
| 112 |
+
|
| 113 |
+
model_configs = dict(
|
| 114 |
+
model_name="seamlessM4T_v2_large",
|
| 115 |
+
device="cuda:0",
|
| 116 |
+
source_segment_size=320,
|
| 117 |
+
waitk_lagging=7,
|
| 118 |
+
fixed_pre_decision_ratio=2,
|
| 119 |
+
init_target_tokens="</s> __eng__",
|
| 120 |
+
max_len_a=0,
|
| 121 |
+
max_len_b=200,
|
| 122 |
+
agent_class="seamless_communication.cli.streaming.agents.tt_waitk_unity_s2t_m4t.TestTimeWaitKUnityS2TM4T",
|
| 123 |
+
task="s2st",
|
| 124 |
+
tgt_lang="eng",
|
| 125 |
+
)
|
| 126 |
+
|
| 127 |
+
eval_configs = dict(
|
| 128 |
+
latency_metrics="StartOffset EndOffset AL",
|
| 129 |
+
output=f"{TestTimeWaitKUnityS2TM4T.__name__}-wait{model_configs['waitk_lagging']}-debug",
|
| 130 |
+
)
|
| 131 |
+
|
| 132 |
+
model = TestTimeWaitKUnityS2TM4T({**data_configs, **model_configs, **eval_configs})
|
| 133 |
+
print("model", model)
|
| 134 |
+
|
| 135 |
+
evaluate(
|
| 136 |
+
TestTimeWaitKUnityS2TM4T, {**data_configs, **model_configs, **eval_configs}
|
| 137 |
+
)
|
| 138 |
+
|
| 139 |
class SimulevalTranscoder:
|
| 140 |
# def __init__(self, agent, sample_rate, debug, buffer_limit):
|
| 141 |
def __init__(self):
|
| 142 |
+
# print("MDUPPES in here", SileroVADAgent, TestTimeWaitKS2TVAD)
|
| 143 |
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
| 144 |
|
| 145 |
+
|
| 146 |
+
|
| 147 |
+
load_model_fairseq2()
|
| 148 |
+
|
| 149 |
device = "cpu"
|
| 150 |
print("DEVICE", device)
|
| 151 |
model_name_or_card="seamlessM4T_medium"
|
|
|
|
| 187 |
|
| 188 |
pipeline = TestTimeWaitKUnityV1M4T(model, args)
|
| 189 |
system_states = pipeline.build_states()
|
| 190 |
+
print('system states:')
|
| 191 |
+
for state in system_states:
|
| 192 |
+
print(state, vars(state))
|
| 193 |
+
|
| 194 |
input_segment = np.empty(0, dtype=np.int16)
|
| 195 |
segments = []
|
| 196 |
while True:
|
|
|
|
| 200 |
output_segment = pipeline.pushpop(speech_segment, system_states)
|
| 201 |
print('pushpop result')
|
| 202 |
print(output_segment)
|
| 203 |
+
print('system states after pushpop:')
|
| 204 |
+
for state in system_states:
|
| 205 |
+
print(state, vars(state))
|
| 206 |
if output_segment.finished:
|
| 207 |
segments.append(input_segment)
|
| 208 |
input_segment = np.empty(0, dtype=np.int16)
|