yongqiang commited on
Commit
1ed9a31
·
1 Parent(s): 028f9bb

Initialize the repository

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +5 -0
  2. .gitignore +4 -0
  3. README.md +44 -0
  4. assets/demo.png +3 -0
  5. config.py +23 -0
  6. gui.py +65 -0
  7. index/docs.index +0 -0
  8. index/docs.pkl +3 -0
  9. llm_api.py +142 -0
  10. models/Qwen2.5-1.5B-Instruct_axmodel/model.embed_tokens.weight.bfloat16.bin +3 -0
  11. models/Qwen2.5-1.5B-Instruct_axmodel/model.embed_tokens.weight.float32.bin +3 -0
  12. models/Qwen2.5-1.5B-Instruct_axmodel/model.embed_tokens.weight.npy +3 -0
  13. models/Qwen2.5-1.5B-Instruct_axmodel/qwen2_p128_l0_together.axmodel +3 -0
  14. models/Qwen2.5-1.5B-Instruct_axmodel/qwen2_p128_l10_together.axmodel +3 -0
  15. models/Qwen2.5-1.5B-Instruct_axmodel/qwen2_p128_l11_together.axmodel +3 -0
  16. models/Qwen2.5-1.5B-Instruct_axmodel/qwen2_p128_l12_together.axmodel +3 -0
  17. models/Qwen2.5-1.5B-Instruct_axmodel/qwen2_p128_l13_together.axmodel +3 -0
  18. models/Qwen2.5-1.5B-Instruct_axmodel/qwen2_p128_l14_together.axmodel +3 -0
  19. models/Qwen2.5-1.5B-Instruct_axmodel/qwen2_p128_l15_together.axmodel +3 -0
  20. models/Qwen2.5-1.5B-Instruct_axmodel/qwen2_p128_l16_together.axmodel +3 -0
  21. models/Qwen2.5-1.5B-Instruct_axmodel/qwen2_p128_l17_together.axmodel +3 -0
  22. models/Qwen2.5-1.5B-Instruct_axmodel/qwen2_p128_l18_together.axmodel +3 -0
  23. models/Qwen2.5-1.5B-Instruct_axmodel/qwen2_p128_l19_together.axmodel +3 -0
  24. models/Qwen2.5-1.5B-Instruct_axmodel/qwen2_p128_l1_together.axmodel +3 -0
  25. models/Qwen2.5-1.5B-Instruct_axmodel/qwen2_p128_l20_together.axmodel +3 -0
  26. models/Qwen2.5-1.5B-Instruct_axmodel/qwen2_p128_l21_together.axmodel +3 -0
  27. models/Qwen2.5-1.5B-Instruct_axmodel/qwen2_p128_l22_together.axmodel +3 -0
  28. models/Qwen2.5-1.5B-Instruct_axmodel/qwen2_p128_l23_together.axmodel +3 -0
  29. models/Qwen2.5-1.5B-Instruct_axmodel/qwen2_p128_l24_together.axmodel +3 -0
  30. models/Qwen2.5-1.5B-Instruct_axmodel/qwen2_p128_l25_together.axmodel +3 -0
  31. models/Qwen2.5-1.5B-Instruct_axmodel/qwen2_p128_l26_together.axmodel +3 -0
  32. models/Qwen2.5-1.5B-Instruct_axmodel/qwen2_p128_l27_together.axmodel +3 -0
  33. models/Qwen2.5-1.5B-Instruct_axmodel/qwen2_p128_l2_together.axmodel +3 -0
  34. models/Qwen2.5-1.5B-Instruct_axmodel/qwen2_p128_l3_together.axmodel +3 -0
  35. models/Qwen2.5-1.5B-Instruct_axmodel/qwen2_p128_l4_together.axmodel +3 -0
  36. models/Qwen2.5-1.5B-Instruct_axmodel/qwen2_p128_l5_together.axmodel +3 -0
  37. models/Qwen2.5-1.5B-Instruct_axmodel/qwen2_p128_l6_together.axmodel +3 -0
  38. models/Qwen2.5-1.5B-Instruct_axmodel/qwen2_p128_l7_together.axmodel +3 -0
  39. models/Qwen2.5-1.5B-Instruct_axmodel/qwen2_p128_l8_together.axmodel +3 -0
  40. models/Qwen2.5-1.5B-Instruct_axmodel/qwen2_p128_l9_together.axmodel +3 -0
  41. models/Qwen2.5-1.5B-Instruct_axmodel/qwen2_post.axmodel +3 -0
  42. models/Qwen3-Embedding-0.6B_axmodel/model.embed_tokens.weight.bfloat16.bin +3 -0
  43. models/Qwen3-Embedding-0.6B_axmodel/model.embed_tokens.weight.float32.bin +3 -0
  44. models/Qwen3-Embedding-0.6B_axmodel/model.embed_tokens.weight.npy +3 -0
  45. models/Qwen3-Embedding-0.6B_axmodel/qwen3_p128_l0_together.axmodel +3 -0
  46. models/Qwen3-Embedding-0.6B_axmodel/qwen3_p128_l10_together.axmodel +3 -0
  47. models/Qwen3-Embedding-0.6B_axmodel/qwen3_p128_l11_together.axmodel +3 -0
  48. models/Qwen3-Embedding-0.6B_axmodel/qwen3_p128_l12_together.axmodel +3 -0
  49. models/Qwen3-Embedding-0.6B_axmodel/qwen3_p128_l13_together.axmodel +3 -0
  50. models/Qwen3-Embedding-0.6B_axmodel/qwen3_p128_l14_together.axmodel +3 -0
.gitattributes CHANGED
@@ -33,3 +33,8 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.axmodel filter=lfs diff=lfs merge=lfs -text
37
+ *.jpg filter=lfs diff=lfs merge=lfs -text
38
+ *.png filter=lfs diff=lfs merge=lfs -text
39
+ *.mp4 filter=lfs diff=lfs merge=lfs -text
40
+ examples/red-panda.mp4 filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ __pycache__
2
+ build-output/
3
+ tmp/
4
+ *.safetensors
README.md CHANGED
@@ -1,3 +1,47 @@
1
  ---
2
  license: mit
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  license: mit
3
  ---
4
+
5
+ # RAG.AXERA DEMO
6
+
7
+ ![rag_demo](assets/demo.png)
8
+
9
+ ## 项目说明
10
+
11
+ ```sh
12
+ (hf) ➜ rag.axera git:(main) ✗ tree -L 2
13
+ .
14
+ ├── assets
15
+ │   └── demo.png
16
+ ├── config.py # 配置 axmodel, tokenizer 文件路径
17
+ ├── data
18
+ ├── gui.py # RAG 交互式程序
19
+ ├── index # 文档编码向量索引保存位置
20
+ │   ├── docs.index
21
+ │   └── docs.pkl
22
+ ├── llm_api.py # llm 主程序
23
+ ├── models # axmodel 模型存储位置
24
+ │   ├── Qwen2.5-1.5B-Instruct_axmodel
25
+ │   └── Qwen3-Embedding-0.6B_axmodel
26
+ ├── pdf_sample # 示例 pdf 文件
27
+ │   └── introduction.pdf
28
+ ├── rag_engine.py # 文档向量编码程序
29
+ ├── README.md
30
+ ├── requirements.txt
31
+ ├── tokenizer
32
+ │   ├── Qwen2.5-1.5B-Instruct
33
+ │   └── Qwen3-Embedding-0.6B
34
+ └── utils
35
+ └── infer_func.py
36
+
37
+ 11 directories, 11 files
38
+ ```
39
+
40
+ ## 运行
41
+
42
+ 在 `AXCL` 机器或 `AX650` 开发板上启动两个终端界面, 分别运行下面的命令:
43
+
44
+ ```sh
45
+ python3 llm_api.py # 在 AX650 或 AXCL 开发板启动 llm 服务
46
+ python3 gui.py # 启动交互式界面
47
+ ```
assets/demo.png ADDED

Git LFS Details

  • SHA256: 4fe87b369f0d4f91b433f737027f120cbbd7ad8811b46d01221070ef595d4c63
  • Pointer size: 131 Bytes
  • Size of remote file: 360 kB
config.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # config.py
2
+ import os
3
+ from dotenv import load_dotenv
4
+
5
+ load_dotenv()
6
+
7
+ # models and paths
8
+ LLM_HF_MODEL = os.getenv("LLM_HF_MODEL", "./tokenizer/Qwen2.5-1.5B-Instruct") # 只需要根据实际路径修改这里的模型路径即可
9
+ LLM_AX_MODEL = os.getenv("LLM_AX_MODEL", "./models/Qwen2.5-1.5B-Instruct_axmodel")
10
+ EMBED_HF_MODEL = os.getenv("EMBED_HF_MODEL", "./tokenizer/Qwen3-Embedding-0.6B")
11
+ EMBED_AX_MODEL = os.getenv("EMBED_AX_MODEL", "./models/Qwen3-Embedding-0.6B_axmodel")
12
+
13
+ # API URL
14
+ LLM_API_PORT = int(os.getenv("LLM_API_PORT", "8000"))
15
+ LLM_API_URL = os.getenv("LLM_API_URL", f"http://127.0.0.1:{LLM_API_PORT}/generate") # 具体函数名字需要对应修改
16
+ PORT = int(os.getenv("PORT", "7860"))
17
+
18
+ # Index paths
19
+ INDEX_DIR = os.getenv("INDEX_DIR", "index")
20
+ INDEX_FILE = os.path.join(INDEX_DIR, "docs.index")
21
+ EMBEDDINGS_FILE = os.path.join(INDEX_DIR, "docs.pkl")
22
+
23
+ os.makedirs(INDEX_DIR, exist_ok=True)
gui.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # gui.py
2
+ import gradio as gr
3
+ from rag_engine import ask_question, build_index, stream_answer
4
+ from config import PORT
5
+ import time
6
+
7
+ chat_history = []
8
+
9
+ def handle_upload(file):
10
+ if file is None:
11
+ return "❌ 请上传文件", ""
12
+ try:
13
+ result = build_index(file.name) # file.name 是本地路径
14
+ return result, ""
15
+ except Exception as e:
16
+ return f"❌ 构建索引失败:{str(e)}", ""
17
+
18
+ def handle_chat(message, history):
19
+ history = history or []
20
+ if not message.strip():
21
+ return "", history
22
+ try:
23
+ # 使用流式响应
24
+ history.append((message, ""))
25
+ full_response = ""
26
+
27
+ # 获取流式响应生成器
28
+ answer_generator = stream_answer(message)
29
+
30
+ # 逐个token添加到聊天历史
31
+ for token in answer_generator:
32
+ full_response += token
33
+ history[-1] = (message, full_response)
34
+ yield "", history
35
+ time.sleep(0.02) # 添加微小延迟使输出更平滑
36
+
37
+ # 流结束后添加一点停顿
38
+ time.sleep(0.1)
39
+ yield "", history
40
+
41
+ except Exception as e:
42
+ history.append((message, f"⚠️ 出错了:{str(e)}"))
43
+ return "", history
44
+
45
+ with gr.Blocks(title="RAG 文档问答系统") as demo:
46
+ gr.Markdown("## 🤖 AXERA RAG 文档问答\n请上传 PDF 或 TXT 文件并提问")
47
+
48
+ with gr.Row():
49
+ with gr.Column(scale=1):
50
+ file_input = gr.File(label="📄 上传文件", file_types=[".pdf", ".txt"])
51
+ upload_btn = gr.Button("📥 上传并构建索引")
52
+ upload_status = gr.Textbox(label="", interactive=False)
53
+
54
+ with gr.Column(scale=2):
55
+ chatbot = gr.Chatbot(height=400, label="🧠 问答对话")
56
+ with gr.Row():
57
+ message = gr.Textbox(placeholder="请输入你的问题,按 Shift + Enter 发送", show_label=False, lines=2)
58
+ send_btn = gr.Button("🚀 发送")
59
+
60
+ upload_btn.click(fn=handle_upload, inputs=[file_input], outputs=[upload_status, message])
61
+ send_btn.click(fn=handle_chat, inputs=[message, chatbot], outputs=[message, chatbot])
62
+ message.submit(fn=handle_chat, inputs=[message, chatbot], outputs=[message, chatbot])
63
+
64
+ # 启用队列并启动
65
+ demo.queue().launch(server_port=PORT)
index/docs.index ADDED
Binary file (24.6 kB). View file
 
index/docs.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad92dcae43938a9ca8afbfff7c9cf6c671a2290e4839f0199a6f834eefdfceac
3
+ size 5705
llm_api.py ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # llm_api.py
2
+ from fastapi import FastAPI, HTTPException
3
+ from fastapi.responses import StreamingResponse
4
+ from pydantic import BaseModel
5
+ from typing import Optional
6
+ import uvicorn
7
+ import numpy as np
8
+ import os
9
+ import torch
10
+ from transformers import AutoTokenizer, AutoModelForCausalLM, AutoConfig
11
+ from config import LLM_HF_MODEL, LLM_AX_MODEL, LLM_API_PORT
12
+
13
+
14
+ app = FastAPI(title="Fast-API", description="本地推理接口")
15
+ device = "cuda" if torch.cuda.is_available() else "cpu"
16
+
17
+ """
18
+ axengine 相关
19
+ """
20
+ from ml_dtypes import bfloat16
21
+ from utils.infer_func import InferManager
22
+
23
+ # 定义全局变量,但先不初始化
24
+ tokenizer = None
25
+ imer = None
26
+ embeds = None
27
+
28
+ def init_model():
29
+ global tokenizer, imer, embeds
30
+ if tokenizer is None: # 防止重复初始化
31
+ cfg = AutoConfig.from_pretrained(LLM_HF_MODEL)
32
+ imer = InferManager(cfg, LLM_AX_MODEL, model_type="qwen2")
33
+ embeds = np.load(os.path.join(LLM_AX_MODEL, "model.embed_tokens.weight.npy"))
34
+ # 加载 tokenizer
35
+ tokenizer = AutoTokenizer.from_pretrained(LLM_HF_MODEL, trust_remote_code=True)
36
+ print("✅ 模型加载完成。")
37
+
38
+ # 添加 FastAPI 的启动事件
39
+ @app.on_event("startup")
40
+ async def startup_event():
41
+ init_model()
42
+
43
+ class GenRequest(BaseModel):
44
+ prompt: str
45
+ max_tokens: Optional[int] = 1024
46
+ temperature: Optional[float] = 0.6
47
+ top_p: Optional[float] = 0.9
48
+
49
+ class GenResponse(BaseModel):
50
+ text: str
51
+
52
+ @app.post("/generate", response_model=GenResponse)
53
+ def generate_text(req: GenRequest):
54
+ try:
55
+ # input_ids = tokenizer(req.prompt, return_tensors="pt").input_ids.to(device)
56
+
57
+ # with torch.no_grad():
58
+ # output_ids = model.generate(
59
+ # input_ids=input_ids,
60
+ # max_new_tokens=req.max_tokens,
61
+ # temperature=req.temperature,
62
+ # top_p=req.top_p,
63
+ # # do_sample=True,
64
+ # eos_token_id=tokenizer.eos_token_id
65
+ # )
66
+
67
+ # response_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
68
+ messages = [
69
+ {"role": "system", "content": "你的名字叫做 [AXERA-RAG 助手]. 你是一个高效、精准的问答助手. 你可以根据上下文内容, 回答用户提出的问题, 回答时不要提及多余的、无用的内容, 且仅输出你的回答."},
70
+ {"role": "user", "content": req.prompt}
71
+ ]
72
+ text = tokenizer.apply_chat_template(
73
+ messages,
74
+ tokenize=False,
75
+ add_generation_prompt=True
76
+ )
77
+ model_inputs = tokenizer([text], return_tensors="pt").to(device)
78
+
79
+ """
80
+ axengine 框架模型推理
81
+ """
82
+ input_ids = model_inputs['input_ids']
83
+ inputs_embeds = np.take(embeds, input_ids.cpu().numpy(), axis=0)
84
+ prefill_data = inputs_embeds
85
+ prefill_data = prefill_data.astype(bfloat16)
86
+ token_ids = input_ids[0].cpu().numpy().tolist()
87
+ generated_text = ""
88
+
89
+ def generate_stream():
90
+ nonlocal token_ids, generated_text
91
+ token_ids = imer.prefill(tokenizer, token_ids, prefill_data[0], slice_len=128)
92
+ generated_text += tokenizer.decode(token_ids[-1], skip_special_tokens=True)
93
+
94
+ # response_text = imer.decode(tokenizer, token_ids, embeds, slice_len=128)
95
+ # 去掉 prompt 的前缀, 只保留生成部分
96
+ # generated_text = response_text[len(req.prompt):].strip()
97
+ # generated_text = response_text
98
+ # return GenResponse(text=generated_text)
99
+
100
+ # 流式输出控制
101
+ prefill_word = tokenizer.decode(token_ids[-1], skip_special_tokens=True)
102
+ prefill_word = prefill_word.strip().replace("\n", "\\n").replace("\"", "\\\"")
103
+
104
+ seq_len = len(token_ids) - 1
105
+ prefill_len = 128
106
+ for step_idx in range(imer.max_seq_len):
107
+ if prefill_len > 0 and step_idx < seq_len:
108
+ continue
109
+ token_ids, next_token_id = imer.decode_next_token(tokenizer, token_ids, embeds, slice_len=128, step_idx=step_idx)
110
+ if next_token_id == tokenizer.eos_token_id and next_token_id > seq_len:
111
+ break
112
+ try:
113
+ if next_token_id is not None:
114
+ word = tokenizer.decode([next_token_id], skip_special_tokens=True)
115
+ generated_text += word
116
+ if prefill_word is not None:
117
+ word = prefill_word + word
118
+ prefill_word = None
119
+ # 以适合前端处理的 SSE 格式输出
120
+ # 处理特殊字符
121
+ word = word.strip().replace("\n", "\\n").replace("\"", "\\\"")
122
+ # import pdb; pdb.set_trace()
123
+ yield f"data: {{\"token\": \"{word}\"}}\n\n"
124
+ except Exception as e:
125
+ print(f"Error decoding token {next_token_id}: {e}")
126
+
127
+ return StreamingResponse(
128
+ generate_stream(),
129
+ media_type="text/event-stream", # 必须使用SSE格式
130
+ headers={
131
+ "Cache-Control": "no-cache",
132
+ "Connection": "keep-alive",
133
+ "X-Accel-Buffering": "no" # 禁用Nginx缓冲
134
+ }
135
+ )
136
+
137
+ except Exception as e:
138
+ raise HTTPException(status_code=500, detail=str(e))
139
+
140
+
141
+ if __name__ == "__main__":
142
+ uvicorn.run(app, host="0.0.0.0", port=LLM_API_PORT, reload=False)
models/Qwen2.5-1.5B-Instruct_axmodel/model.embed_tokens.weight.bfloat16.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2c594e5f910978ef413824340261a6055c5bb905dcfefceed9d30dd2b80637e
3
+ size 466747392
models/Qwen2.5-1.5B-Instruct_axmodel/model.embed_tokens.weight.float32.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2517b46ffa436067fc5e2bd7e191107c2c5c9f29892358e8639957b0057a287
3
+ size 933494784
models/Qwen2.5-1.5B-Instruct_axmodel/model.embed_tokens.weight.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fcec2b0923f51df5891df10e715e3204e3bb039f26780112ade2f1b7da997bef
3
+ size 933494912
models/Qwen2.5-1.5B-Instruct_axmodel/qwen2_p128_l0_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63ef37adbe925889dc0be44e9ba35aefec5eb8f1824f3c785510765f1a1bc6fe
3
+ size 67100526
models/Qwen2.5-1.5B-Instruct_axmodel/qwen2_p128_l10_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe006a7dd1096a5d8c68ef9d70ed49f9f1ed4769f122080a735da8d04f025e02
3
+ size 67100526
models/Qwen2.5-1.5B-Instruct_axmodel/qwen2_p128_l11_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e350949ca5683b3084ae4241bdd1fddcaebe5ef8e70c0d8aef339a3b5e693c2
3
+ size 67100526
models/Qwen2.5-1.5B-Instruct_axmodel/qwen2_p128_l12_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39501bd3523287d5fa67973abf11a91f4a67c79ae096c0d31c7e07fe01427690
3
+ size 67100526
models/Qwen2.5-1.5B-Instruct_axmodel/qwen2_p128_l13_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:627aa4354f2c7327bf2c053d2a5f6acecde55a406ebd0713cd7c7feffa6855c0
3
+ size 67100526
models/Qwen2.5-1.5B-Instruct_axmodel/qwen2_p128_l14_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8d04ec0cb7ba6170c18c6fb6ccdec8586d3568717d93de562ac31b032fd34d5
3
+ size 67100526
models/Qwen2.5-1.5B-Instruct_axmodel/qwen2_p128_l15_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d0ab59dc0bf214354b6ed6b9b155d5ff4fb3e7cc10975cc0c1652e39a660f7a
3
+ size 67100526
models/Qwen2.5-1.5B-Instruct_axmodel/qwen2_p128_l16_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd4e7800e81bbed49d15c4354ad763a699af9f8ec3413dd9d1cc9a6b49d9c6fb
3
+ size 67100526
models/Qwen2.5-1.5B-Instruct_axmodel/qwen2_p128_l17_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b8d5ec8f6dafda2b90ede8a630a98e865c3b6012995c17e6a6b5b606f269237
3
+ size 67100526
models/Qwen2.5-1.5B-Instruct_axmodel/qwen2_p128_l18_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79b37891f1710e559ba39604b83badad68c4ec971fb6883733e963afff250c1b
3
+ size 67100526
models/Qwen2.5-1.5B-Instruct_axmodel/qwen2_p128_l19_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3863cbeb6191f43dfc7e3d31c3bfe4bf4dccba3ea9769003c52754ec2fb4be12
3
+ size 67100526
models/Qwen2.5-1.5B-Instruct_axmodel/qwen2_p128_l1_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4755cfff48189833c5062bc07683e551443424b985ed1575b8db4a2297ca0894
3
+ size 67100526
models/Qwen2.5-1.5B-Instruct_axmodel/qwen2_p128_l20_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f34c1d1589a2f2545f73eb03dc231e2fd434e26fb7015b0ae4e78cb9ab6329c2
3
+ size 67100526
models/Qwen2.5-1.5B-Instruct_axmodel/qwen2_p128_l21_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64df1cce0340b34a586363b5d86158289501895836667e084ca172c81ae5dbcf
3
+ size 67100526
models/Qwen2.5-1.5B-Instruct_axmodel/qwen2_p128_l22_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c60464b0a790a9e39934a35f7bdc4b1948903cb74c93c3321313a4f9d2e9b6b
3
+ size 67100526
models/Qwen2.5-1.5B-Instruct_axmodel/qwen2_p128_l23_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a71d4d9806fc91f4eeddc7c336074f26fc10a436163537c1f6fb5f92ab0c13c2
3
+ size 67100526
models/Qwen2.5-1.5B-Instruct_axmodel/qwen2_p128_l24_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:678177517e5efaa54688a2debe1402c83a056285062c00d119ec0f53f80b22ff
3
+ size 67100526
models/Qwen2.5-1.5B-Instruct_axmodel/qwen2_p128_l25_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea4077e32a81594ed2eab85edca2579aaa07d6610c38bae722d50519db54f7c5
3
+ size 67100526
models/Qwen2.5-1.5B-Instruct_axmodel/qwen2_p128_l26_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e23d6bb592bb43cc68f2cee5d95be7f97401be13f8bb882bfe5ca23d5f7a5a1
3
+ size 67100526
models/Qwen2.5-1.5B-Instruct_axmodel/qwen2_p128_l27_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6cd266fdd498697b600309ffd89c3f0e43d8dbb7aaf3f4f0413520c9500e7ab6
3
+ size 67100526
models/Qwen2.5-1.5B-Instruct_axmodel/qwen2_p128_l2_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:beae1c79a0455b1d66747e0078a2057d299360558a4a4f7c81ea4a9062a8fa1b
3
+ size 67102542
models/Qwen2.5-1.5B-Instruct_axmodel/qwen2_p128_l3_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:716ea68fe92de87bb162f8033bfbe57448b4eca36ce25af0c5b74078240190ee
3
+ size 67100526
models/Qwen2.5-1.5B-Instruct_axmodel/qwen2_p128_l4_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47d97c2421ccbb5c64e50337a1f7b4615f5f51ad2c50a16f2f9afef951e33056
3
+ size 67100526
models/Qwen2.5-1.5B-Instruct_axmodel/qwen2_p128_l5_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eda5ef5b2eae8c023fa47f13ffc1757c921b0cecfcfee2d3dedf9f3ea8079d73
3
+ size 67100526
models/Qwen2.5-1.5B-Instruct_axmodel/qwen2_p128_l6_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:693e656bfc8c54beb76a194be5839ffe6535691b5fc3c74e6a7ad8ea291e9c89
3
+ size 67100526
models/Qwen2.5-1.5B-Instruct_axmodel/qwen2_p128_l7_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d842213daf2da92c1f4df7d981233f5faa64344f4ddec8009c9874a54fa650f
3
+ size 67100526
models/Qwen2.5-1.5B-Instruct_axmodel/qwen2_p128_l8_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:957387916555920f33d3207db940e1918f64c7c389c31f808d324d2e3d9d0d95
3
+ size 67100526
models/Qwen2.5-1.5B-Instruct_axmodel/qwen2_p128_l9_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3da1f60bc8b971ef8ef6098cf11e4b44cbc9c819cc6cfeb3b646941e50d13a9
3
+ size 67100526
models/Qwen2.5-1.5B-Instruct_axmodel/qwen2_post.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1d25088c38ccf35bd82c7d871878ab1cbe8512e00130ca871eb5e9601768a94
3
+ size 254449571
models/Qwen3-Embedding-0.6B_axmodel/model.embed_tokens.weight.bfloat16.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a55b140d86852835bd18d8200222a9f302340730f0670eb7e23a4895e5489033
3
+ size 310618112
models/Qwen3-Embedding-0.6B_axmodel/model.embed_tokens.weight.float32.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7a027c062fb61cd505e046bc832345be155e1eb2fab629675cebe7973646c85
3
+ size 621236224
models/Qwen3-Embedding-0.6B_axmodel/model.embed_tokens.weight.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6bbdc47aee1b4cdb97a42a255306d4e0a1cb52f797bfdc32f94469eb0cd0744e
3
+ size 621236352
models/Qwen3-Embedding-0.6B_axmodel/qwen3_p128_l0_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:025e17a92f3f19d58a36ef119294598073c4ccdc794aa9d4a2845a99b0c6b53d
3
+ size 28019747
models/Qwen3-Embedding-0.6B_axmodel/qwen3_p128_l10_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95d809d6cc6889517b1aad7a4e62e51ffbf75580dda5ceafb667dbd5ac10ba6e
3
+ size 28019779
models/Qwen3-Embedding-0.6B_axmodel/qwen3_p128_l11_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7fb730805877eceea4aa037694bc2abb830fa960c6666d24b976d7ae35c058d0
3
+ size 28018723
models/Qwen3-Embedding-0.6B_axmodel/qwen3_p128_l12_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eef7266dff6af522a0a63095067a1c7823a9a1213e7bd498bcdb97f2814523ba
3
+ size 28019427
models/Qwen3-Embedding-0.6B_axmodel/qwen3_p128_l13_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9322be28dad0729b006238364297659594e2193a97a37556fb06f63d3fec9fa0
3
+ size 28019459
models/Qwen3-Embedding-0.6B_axmodel/qwen3_p128_l14_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5a8f239e4a4e793e0bdf86226c08c2089f5199118bd38d2be4957f9b7023dda
3
+ size 28018723