biptv3 / code /superpoint_ops /run(2).py
YYYYYYUUU's picture
Add core reproduction code (binarization layers, PTv3, superpoint ops, min-repro pack)
7b95dc2 verified
Raw
History Blame Contribute Delete
4.22 kB
from __future__ import print_function
import volcenginesdkcore
import volcenginesdkmlplatform20240701
from volcenginesdkcore.rest import ApiException
import datetime
import os
os.environ['no_proxy'] = 'volces.com,volcengineapi.com'
if __name__ == '__main__':
configuration = volcenginesdkcore.Configuration()
configuration.ak = "AKLTN2FkNjljZTc5ZTc0NGExZGExZjMwNDMxNDdiYjQ3ZDY"
configuration.sk = "T0dNeU9UUmpPRGcyWkRVM05HVXlOamxsTVdJeE1XVTJaalJoT1RaaU9HVQ=="
configuration.region = "cn-beijing2"
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
log_file_eval=f"/map-vepfs/haozhe/BiDIT/qqllm/dection_{timestamp}.log"
bash_command = f'''
# cd /map-vepfs/haozhe/BiDIT/qqllm/HBLLM &&
# source activate /map-vepfs/haozhe/BiDIT/qqllm/dataset_env &&
# sh /map-vepfs/haozhe/BiDIT/qqllm/HBLLM/run.sh
# cd /map-vepfs/haozhe/BiDIT/qqllm/BiLLM &&
# source activate /map-vepfs/haozhe/BiDIT/qqllm/dataset_env &&
# bash /map-vepfs/haozhe/BiDIT/qqllm/OBR/FlatQuant/run_Qwen2.5-7B/run_obr_flatquant_qwen25_7b_w4a4kv4_s50.sh
# cd /map-vepfs/haozhe/BiDIT/qqllm &&
# source activate /map-vepfs/haozhe/BiDIT/qqllm/dataset_env &&
# bash /map-vepfs/haozhe/BiDIT/qqllm/eval_scripts/eval_standard_qwen7b.sh
cd /map-vepfs/haozhe/BiDIT/qqllm/WANDA_gptq_shift &&
source activate /map-vepfs/haozhe/BiDIT/qqllm/dataset_env &&
bash /map-vepfs/haozhe/BiDIT/qqllm/WANDA_gptq_shift/QuaRot/run_final_0406retry_llama31_70b_quarot_no_rot_sparsegpt_gptq_baseline_w4a16kv16_c4_s50_ns128_asym.sh
'''
card_type = "ml.pni2l.7xlarge"
jobname = f'TR-DQ_{timestamp}'
resource_queue_id = "q-20241024095431-86z6j"
vepfs_id = "vepfs-cnbj26c39866e9ec1"
vepfs_host_path = f"/mnt/{vepfs_id}"
volcenginesdkcore.Configuration.set_default(configuration)
api_instance = volcenginesdkmlplatform20240701.MLPLATFORM20240701Api()
def create_job():
req_resource = volcenginesdkmlplatform20240701.ResourceForCreateJobInput(
instance_type_id=card_type,
type="Preset",
zone_id="cn-beijing2-a",
)
req_roles = volcenginesdkmlplatform20240701.RoleForCreateJobInput(
name="worker",
replicas=1,
resource=req_resource,
)
req_resource_config = volcenginesdkmlplatform20240701.ResourceConfigForCreateJobInput(
priority=2,
resource_queue_id=resource_queue_id,
roles=[req_roles],
)
req_image = volcenginesdkmlplatform20240701.ImageForCreateJobInput(
type="Prebuild",
url="vemlp-cn-beijing2.cr.volces.com/preset-images/ray:2.12.0-cuda-121-py310",
)
req_runtime_config = volcenginesdkmlplatform20240701.RuntimeConfigForCreateJobInput(
command=bash_command,
framework="PyTorch",
image=req_image,
)
req_vepfs = volcenginesdkmlplatform20240701.VepfsForCreateJobInput(
host_path=vepfs_host_path,
id=vepfs_id,
)
req_config = volcenginesdkmlplatform20240701.ConfigForCreateJobInput(
vepfs=req_vepfs,
)
req_storages = volcenginesdkmlplatform20240701.StorageForCreateJobInput(
config=req_config,
mount_path="/map-vepfs",
type="Vepfs",
)
req_storage_config = volcenginesdkmlplatform20240701.StorageConfigForCreateJobInput(
storages=[req_storages],
)
create_job_request = volcenginesdkmlplatform20240701.CreateJobRequest(
dry_run=False,
name=jobname,
resource_config=req_resource_config,
runtime_config=req_runtime_config,
storage_config=req_storage_config,
)
try:
response = api_instance.create_job(create_job_request)
job_id = response.id
print(f"✅ 作业提交成功")
print(f" 作业ID: {job_id}")
print(f" 作业名称: {jobname}")
print(f" 等待5-10分钟启动...")
return job_id
except ApiException as e:
print(f"❌ 提交失败: {e}")
return None
created_job_id = create_job()