Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -22,10 +22,9 @@ auth_token = os.environ.get("TOKEN_FROM_SECRET")
|
|
| 22 |
##########################################
|
| 23 |
from huggingface_hub import snapshot_download
|
| 24 |
snapshot_download(repo_id="Emova-ollm/emova_speech_tokenizer", local_dir='./emova_speech_tokenizer', token=auth_token)
|
| 25 |
-
os.system("cd emova_speech_tokenizer && pip install -e .")
|
| 26 |
|
| 27 |
-
from emova_speech_tokenizer.speech_utils import get_S2U_ckpt_config_path, load_S2U_model, s2u_extract_unit_demo
|
| 28 |
-
from emova_speech_tokenizer.speech_utils import load_condition_centroid, get_U2S_config_checkpoint_file, load_U2S_model, synthesis
|
| 29 |
|
| 30 |
####################
|
| 31 |
# S2U
|
|
@@ -77,6 +76,7 @@ mode2func = dict(
|
|
| 77 |
|
| 78 |
##########################################
|
| 79 |
# LLM part
|
|
|
|
| 80 |
##########################################
|
| 81 |
import torch
|
| 82 |
from transformers import AutoModel, AutoProcessor, TextIteratorStreamer
|
|
@@ -236,7 +236,7 @@ def http_bot(state, temperature, top_p, max_new_tokens, speaker):
|
|
| 236 |
inputs = processor(text=[prompt], images=all_images if len(all_images) > 0 else None, return_tensors="pt")
|
| 237 |
inputs.to(model.device)
|
| 238 |
if len(all_images) > 0:
|
| 239 |
-
inputs['pixel_values'] = inputs['pixel_values'].to(model.dtype)
|
| 240 |
|
| 241 |
# Process hyperparameters
|
| 242 |
temperature = float(pload.get("temperature", 1.0))
|
|
@@ -361,7 +361,7 @@ For an optimal experience, please use desktop computers for this demo, as mobile
|
|
| 361 |
|
| 362 |
learn_more_markdown = ("""
|
| 363 |
## License
|
| 364 |
-
The service is a research preview intended for non-commercial use only, subject to the model [License](https://github.com/
|
| 365 |
|
| 366 |
## Acknowledgement
|
| 367 |
The service is built upon [LLaVA](https://github.com/haotian-liu/LLaVA/). We thanks the authors for open-sourcing the wonderful code.
|
|
|
|
| 22 |
##########################################
|
| 23 |
from huggingface_hub import snapshot_download
|
| 24 |
snapshot_download(repo_id="Emova-ollm/emova_speech_tokenizer", local_dir='./emova_speech_tokenizer', token=auth_token)
|
|
|
|
| 25 |
|
| 26 |
+
from emova_speech_tokenizer.emova_speech_tokenizer.speech_utils import get_S2U_ckpt_config_path, load_S2U_model, s2u_extract_unit_demo
|
| 27 |
+
from emova_speech_tokenizer.emova_speech_tokenizer.speech_utils import load_condition_centroid, get_U2S_config_checkpoint_file, load_U2S_model, synthesis
|
| 28 |
|
| 29 |
####################
|
| 30 |
# S2U
|
|
|
|
| 76 |
|
| 77 |
##########################################
|
| 78 |
# LLM part
|
| 79 |
+
# TODO: 1) change model 2) change arguments
|
| 80 |
##########################################
|
| 81 |
import torch
|
| 82 |
from transformers import AutoModel, AutoProcessor, TextIteratorStreamer
|
|
|
|
| 236 |
inputs = processor(text=[prompt], images=all_images if len(all_images) > 0 else None, return_tensors="pt")
|
| 237 |
inputs.to(model.device)
|
| 238 |
if len(all_images) > 0:
|
| 239 |
+
inputs['pixel_values'] = inputs['pixel_values'].to(model.dtype) # TODO
|
| 240 |
|
| 241 |
# Process hyperparameters
|
| 242 |
temperature = float(pload.get("temperature", 1.0))
|
|
|
|
| 361 |
|
| 362 |
learn_more_markdown = ("""
|
| 363 |
## License
|
| 364 |
+
The service is a research preview intended for non-commercial use only, subject to the model [License](https://github.com/QwenLM/Qwen/blob/main/LICENSE) of Qwen and [Privacy Practices](https://chrome.google.com/webstore/detail/sharegpt-share-your-chatg/daiacboceoaocpibfodeljbdfacokfjb) of ShareGPT. Please contact us if you find any potential violation.
|
| 365 |
|
| 366 |
## Acknowledgement
|
| 367 |
The service is built upon [LLaVA](https://github.com/haotian-liu/LLaVA/). We thanks the authors for open-sourcing the wonderful code.
|