Spaces:
Sleeping
Sleeping
Commit ·
13989e8
1
Parent(s): 352b475
update
Browse files- examples/tutorials/lora_unsloth/step_2_train_model.py +1 -0
- main.py +82 -10
- project_settings.py +2 -0
- requirements.txt +1 -0
- tabs/chat_template_tab.py +54 -0
- tabs/shell_tab.py +48 -0
examples/tutorials/lora_unsloth/step_2_train_model.py
CHANGED
|
@@ -103,6 +103,7 @@ def main():
|
|
| 103 |
train_dataset = train_dataset.map(
|
| 104 |
format_func,
|
| 105 |
batched=False,
|
|
|
|
| 106 |
)
|
| 107 |
print(train_dataset)
|
| 108 |
|
|
|
|
| 103 |
train_dataset = train_dataset.map(
|
| 104 |
format_func,
|
| 105 |
batched=False,
|
| 106 |
+
remove_columns=train_dataset.column_names,
|
| 107 |
)
|
| 108 |
print(train_dataset)
|
| 109 |
|
main.py
CHANGED
|
@@ -1,16 +1,88 @@
|
|
| 1 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
|
| 3 |
-
|
| 4 |
-
# 按 双击 ⇧ 在所有地方搜索类、文件、工具窗口、操作和设置。
|
| 5 |
|
|
|
|
|
|
|
| 6 |
|
| 7 |
-
|
| 8 |
-
# 在下面的代码行中使用断点来调试脚本。
|
| 9 |
-
print(f'Hi, {name}') # 按 ⌘F8 切换断点。
|
| 10 |
|
|
|
|
|
|
|
| 11 |
|
| 12 |
-
# 按装订区域中的绿色按钮以运行脚本。
|
| 13 |
-
if __name__ == '__main__':
|
| 14 |
-
print_hi('PyCharm')
|
| 15 |
|
| 16 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/python3
|
| 2 |
+
# -*- coding: utf-8 -*-
|
| 3 |
+
import argparse
|
| 4 |
+
import asyncio
|
| 5 |
+
import logging
|
| 6 |
+
from pathlib import Path
|
| 7 |
+
import platform
|
| 8 |
|
| 9 |
+
import gradio as gr
|
|
|
|
| 10 |
|
| 11 |
+
import log
|
| 12 |
+
from project_settings import environment, project_path, log_directory, time_zone_info
|
| 13 |
|
| 14 |
+
log.setup_size_rotating(log_directory=log_directory, tz_info=time_zone_info)
|
|
|
|
|
|
|
| 15 |
|
| 16 |
+
from tabs.chat_template_tab import get_chat_template_tab
|
| 17 |
+
from tabs.shell_tab import get_shell_tab
|
| 18 |
|
|
|
|
|
|
|
|
|
|
| 19 |
|
| 20 |
+
logger = logging.getLogger("main")
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
def get_args():
|
| 24 |
+
parser = argparse.ArgumentParser()
|
| 25 |
+
parser.add_argument(
|
| 26 |
+
"--porter_tasks_file_dir",
|
| 27 |
+
default=(project_path / "data/porter_tasks").as_posix(),
|
| 28 |
+
type=str
|
| 29 |
+
)
|
| 30 |
+
parser.add_argument(
|
| 31 |
+
"--live_recorder_tasks_file",
|
| 32 |
+
default=(project_path / "data/live_recorder_tasks.json").as_posix(),
|
| 33 |
+
type=str
|
| 34 |
+
)
|
| 35 |
+
parser.add_argument(
|
| 36 |
+
"--video_download_tasks_file",
|
| 37 |
+
default=(project_path / "data/video_download_tasks.json").as_posix(),
|
| 38 |
+
type=str
|
| 39 |
+
)
|
| 40 |
+
parser.add_argument(
|
| 41 |
+
"--youtube_video_upload_tasks_file",
|
| 42 |
+
default=(project_path / "data/youtube_video_upload_tasks.json").as_posix(),
|
| 43 |
+
type=str
|
| 44 |
+
)
|
| 45 |
+
parser.add_argument(
|
| 46 |
+
"--bilibili_video_upload_tasks_file",
|
| 47 |
+
default=(project_path / "data/bilibili_video_upload_tasks.json").as_posix(),
|
| 48 |
+
type=str
|
| 49 |
+
)
|
| 50 |
+
parser.add_argument(
|
| 51 |
+
"--live_records_dir",
|
| 52 |
+
default=(project_path / "data/live_records").as_posix(),
|
| 53 |
+
type=str
|
| 54 |
+
)
|
| 55 |
+
parser.add_argument(
|
| 56 |
+
"--server_port",
|
| 57 |
+
default=environment.get("server_port", 7860),
|
| 58 |
+
type=int
|
| 59 |
+
)
|
| 60 |
+
|
| 61 |
+
args = parser.parse_args()
|
| 62 |
+
return args
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
def main():
|
| 67 |
+
args = get_args()
|
| 68 |
+
|
| 69 |
+
# ui
|
| 70 |
+
with gr.Blocks() as blocks:
|
| 71 |
+
gr.Markdown(value="live recording.")
|
| 72 |
+
with gr.Tabs():
|
| 73 |
+
_ = get_chat_template_tab()
|
| 74 |
+
_ = get_shell_tab()
|
| 75 |
+
|
| 76 |
+
# http://127.0.0.1:7870/
|
| 77 |
+
# http://10.75.27.247:7870/
|
| 78 |
+
blocks.queue().launch(
|
| 79 |
+
# share=True,
|
| 80 |
+
share=False if platform.system() in ("Windows", "Darwin") else False,
|
| 81 |
+
server_name="127.0.0.1" if platform.system() in ("Windows", "Darwin") else "0.0.0.0",
|
| 82 |
+
server_port=args.server_port
|
| 83 |
+
)
|
| 84 |
+
return
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
if __name__ == "__main__":
|
| 88 |
+
main()
|
project_settings.py
CHANGED
|
@@ -9,6 +9,8 @@ from toolbox.os.environment import EnvironmentManager
|
|
| 9 |
project_path = os.path.abspath(os.path.dirname(__file__))
|
| 10 |
project_path = Path(project_path)
|
| 11 |
|
|
|
|
|
|
|
| 12 |
log_directory = project_path / "logs"
|
| 13 |
log_directory.mkdir(parents=True, exist_ok=True)
|
| 14 |
|
|
|
|
| 9 |
project_path = os.path.abspath(os.path.dirname(__file__))
|
| 10 |
project_path = Path(project_path)
|
| 11 |
|
| 12 |
+
time_zone_info = "Asia/Shanghai"
|
| 13 |
+
|
| 14 |
log_directory = project_path / "logs"
|
| 15 |
log_directory.mkdir(parents=True, exist_ok=True)
|
| 16 |
|
requirements.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
gradio
|
tabs/chat_template_tab.py
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/python3
|
| 2 |
+
# -*- coding: utf-8 -*-
|
| 3 |
+
import json
|
| 4 |
+
|
| 5 |
+
import gradio as gr
|
| 6 |
+
from transformers import AutoTokenizer
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
def run_chat_template(conversation: str, model_name: str, add_generation_prompt: bool = False):
|
| 10 |
+
conversation = json.loads(conversation)
|
| 11 |
+
|
| 12 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 13 |
+
|
| 14 |
+
result = tokenizer.apply_chat_template(
|
| 15 |
+
conversation,
|
| 16 |
+
tokenize=False,
|
| 17 |
+
add_generation_prompt=add_generation_prompt,
|
| 18 |
+
)
|
| 19 |
+
return result
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
def get_chat_template_tab():
|
| 23 |
+
with gr.TabItem("chat_template"):
|
| 24 |
+
model_name_choices = ["unsloth/Qwen3-8B-unsloth-bnb-4bit"]
|
| 25 |
+
ct_model_name = gr.Dropdown(choices=model_name_choices, value=model_name_choices[0], label="model_name")
|
| 26 |
+
ct_conversation = gr.Textbox(label="conversation")
|
| 27 |
+
ct_add_generation_prompt = gr.Checkbox(label="add_generation_prompt")
|
| 28 |
+
ct_tokenize = gr.Button("tokenize")
|
| 29 |
+
ct_output = gr.Textbox(label="output", max_lines=100)
|
| 30 |
+
|
| 31 |
+
ct_tokenize.click(
|
| 32 |
+
run_chat_template,
|
| 33 |
+
inputs=[ct_conversation, ct_model_name, ct_add_generation_prompt],
|
| 34 |
+
outputs=[ct_output],
|
| 35 |
+
)
|
| 36 |
+
|
| 37 |
+
gr.Examples(
|
| 38 |
+
examples=[
|
| 39 |
+
[
|
| 40 |
+
json.dumps([{"role": "user", "content": "帮我识别出文本中的关键词:\n凉山彝族社会中的\"尔普\"(份子钱)是一种礼物交换形式.对\"尔普\"的研究和分析,可有助于人们理解凉山彝族社会.\"尔普\"本来是维系彝族传统社会宗族内部亲属组织的纽带,由于文化变迁的原因,后来发展出了跨宗族的\"尔普\"新形式,又由于族群互动的原因,还产生了跨越族群的\"尔普\"形式.\"尔普\"形式的变迁是族群互动下的一种文化变迁形式,其动力来源于彝、汉两族的互动关系.彝族社会中\"尔普\"的变迁形式是人类学关于族群互动下的文化变迁理论的鲜活事例."}, {"role": "assistant", "content": "彝族;尔普;礼物交换;族群互动"}], ensure_ascii=False),
|
| 41 |
+
"unsloth/Qwen3-8B-unsloth-bnb-4bit",
|
| 42 |
+
True,
|
| 43 |
+
]
|
| 44 |
+
],
|
| 45 |
+
inputs=[ct_conversation, ct_model_name, ct_add_generation_prompt],
|
| 46 |
+
outputs=[ct_output],
|
| 47 |
+
fn=run_chat_template,
|
| 48 |
+
)
|
| 49 |
+
|
| 50 |
+
return locals()
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
if __name__ == "__main__":
|
| 54 |
+
pass
|
tabs/shell_tab.py
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/python3
|
| 2 |
+
# -*- coding: utf-8 -*-
|
| 3 |
+
import gradio as gr
|
| 4 |
+
|
| 5 |
+
from toolbox.os.command import Command
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
def shell(cmd: str):
|
| 9 |
+
return Command.popen(cmd)
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def get_shell_tab():
|
| 13 |
+
with gr.TabItem("shell"):
|
| 14 |
+
shell_text = gr.Textbox(label="cmd")
|
| 15 |
+
shell_button = gr.Button("run")
|
| 16 |
+
shell_output = gr.Textbox(label="output", max_lines=100)
|
| 17 |
+
|
| 18 |
+
shell_button.click(
|
| 19 |
+
shell,
|
| 20 |
+
inputs=[shell_text, ],
|
| 21 |
+
outputs=[shell_output],
|
| 22 |
+
)
|
| 23 |
+
|
| 24 |
+
gr.Examples(
|
| 25 |
+
examples=[
|
| 26 |
+
[
|
| 27 |
+
"echo \"CPU使用率: $(grep 'cpu ' /proc/stat | awk '{usage=($2+$4)*100/($2+$4+$5)} END {print usage \"%\"}')\""
|
| 28 |
+
], [
|
| 29 |
+
"echo \"内存使用: $(free -m | awk '/Mem:/ {printf \"%.1f%%\", $3/$2*100}')\""
|
| 30 |
+
], [
|
| 31 |
+
"echo \"内存总量: $(grep MemTotal /proc/meminfo | awk '{print $2/1024 \" MB\"}')\""
|
| 32 |
+
], [
|
| 33 |
+
"echo \"可用内存: $(grep MemAvailable /proc/meminfo | awk '{print $2/1024 \" MB\"}')\""
|
| 34 |
+
], [
|
| 35 |
+
"grep 'less' logs/info.log | tail -n 15"
|
| 36 |
+
], [
|
| 37 |
+
"ffmpeg -i /home/user/app/data/video/download/video.mp4 -vn -acodec libmp3lame -q:a 2 /home/user/app/data/video/download/audio.mp3"
|
| 38 |
+
]
|
| 39 |
+
],
|
| 40 |
+
inputs=[shell_text],
|
| 41 |
+
outputs=[shell_output],
|
| 42 |
+
)
|
| 43 |
+
|
| 44 |
+
return locals()
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
if __name__ == "__main__":
|
| 48 |
+
pass
|