tanbushi commited on
Commit
702fae5
·
1 Parent(s): 8fafc6b
Files changed (5) hide show
  1. app.py +89 -8
  2. requirements.txt +1 -0
  3. utils/chat_request.py +12 -0
  4. utils/chat_response.py +115 -0
  5. utils/model.py +105 -0
app.py CHANGED
@@ -1,14 +1,95 @@
1
- from fastapi import FastAPI
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
  # 初始化 FastAPI 应用
4
  app = FastAPI(title="HF-Model-Runner API", version="0.0.1")
5
 
6
- model_name = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
  @app.get("/")
9
- def greet_json():
10
- return {
11
- "message": "HF-Model-Runner API is running!",
12
- "model": model_name,
13
- "status": "ready"
14
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException
2
+ import os
3
+ from dotenv import load_dotenv
4
+
5
+ # 导入 utils 模块
6
+ from utils.chat_request import ChatRequest
7
+ from utils.chat_response import create_chat_response, ChatResponse
8
+ from utils.model import check_model, initialize_pipeline, download_model, DownloadRequest
9
+
10
+ # 全局变量
11
+ model_name = None
12
+ pipe = None
13
+ tokenizer = None
14
 
15
  # 初始化 FastAPI 应用
16
  app = FastAPI(title="HF-Model-Runner API", version="0.0.1")
17
 
18
+ @app.on_event("startup")
19
+ async def startup_event():
20
+ """
21
+ 应用启动时初始化 pipeline
22
+ """
23
+ global pipe, tokenizer, model_name
24
+
25
+ # 加载 .env 文件
26
+ load_dotenv()
27
+
28
+ # 从 .env 获取默认模型名称,如果没有则使用默认值
29
+ default_model = os.getenv("DEFAULT_MODEL_NAME", "unsloth/functiongemma-270m-it")
30
+ print(f"应用启动,正在初始化模型: {default_model}")
31
+
32
+ try:
33
+ pipe, tokenizer, success = initialize_pipeline(default_model)
34
+ if success:
35
+ model_name = default_model
36
+ print(f"✓ 模型 {default_model} 初始化成功")
37
+ else:
38
+ print(f"✗ 模型 {default_model} 初始化失败")
39
+ except Exception as e:
40
+ print(f"✗ 启动时模型初始化失败: {e}")
41
 
42
  @app.get("/")
43
+ async def read_root():
44
+ return {"message": "Welcome to HF-Model-Runner API! Visit /docs for API documentation."}
45
+
46
+ @app.post("/download")
47
+ async def download_model_endpoint(request: DownloadRequest):
48
+ """
49
+ 下载指定的 HuggingFace 模型
50
+ """
51
+ global pipe, tokenizer, model_name
52
+
53
+ try:
54
+ success, message = download_model(request.model)
55
+ if success:
56
+ # 下载成功后,直接初始化该模型
57
+ pipe, tokenizer, init_success = initialize_pipeline(request.model)
58
+ if init_success:
59
+ model_name = request.model
60
+ return {
61
+ "status": "success",
62
+ "message": message,
63
+ "loaded": True,
64
+ "current_model": model_name
65
+ }
66
+ else:
67
+ return {
68
+ "status": "success",
69
+ "message": message,
70
+ "loaded": False,
71
+ "error": "模型下载成功但初始化失败"
72
+ }
73
+ else:
74
+ raise HTTPException(status_code=500, detail=message)
75
+ except Exception as e:
76
+ raise HTTPException(status_code=500, detail=str(e))
77
+
78
+ @app.post("/v1/chat/completions", response_model=ChatResponse)
79
+ async def chat_completions(request: ChatRequest):
80
+ """
81
+ OpenAI 兼容的聊天完成接口
82
+ """
83
+ global pipe, tokenizer, model_name
84
+
85
+ # 检查模型是否匹配,如果请求的模型与当前加载的模型不同,需要重新初始化
86
+ if request.model != model_name:
87
+ pipe, tokenizer, success = initialize_pipeline(request.model)
88
+ if not success:
89
+ raise HTTPException(status_code=500, detail="模型初始化失败")
90
+ model_name = request.model
91
+
92
+ try:
93
+ return create_chat_response(request, pipe, tokenizer)
94
+ except Exception as e:
95
+ raise HTTPException(status_code=500, detail=str(e))
requirements.txt CHANGED
@@ -5,3 +5,4 @@ huggingface_hub
5
  torch
6
  accelerate
7
  python-multipart
 
 
5
  torch
6
  accelerate
7
  python-multipart
8
+ python-dotenv
utils/chat_request.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel
2
+ from typing import List, Optional, Dict, Any
3
+
4
+
5
+ class ChatRequest(BaseModel):
6
+ model: Optional[str] = "gpt-3.5-turbo"
7
+ messages: List[Dict[str, Any]]
8
+ temperature: Optional[float] = 1.0
9
+ max_tokens: Optional[int] = None
10
+ top_p: Optional[float] = 1.0
11
+ frequency_penalty: Optional[float] = 0.0
12
+ presence_penalty: Optional[float] = 0.0
utils/chat_response.py ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel
2
+ from typing import List, Optional, Dict, Any
3
+ import time
4
+ import re
5
+
6
+ # 聊天响应模型
7
+ class ChatChoice(BaseModel):
8
+ index: int
9
+ message: Dict[str, str]
10
+ finish_reason: str
11
+
12
+
13
+ class ChatUsage(BaseModel):
14
+ prompt_tokens: int
15
+ completion_tokens: int
16
+ total_tokens: int
17
+
18
+
19
+ class ChatResponse(BaseModel):
20
+ id: str
21
+ object: str
22
+ created: int
23
+ model: str
24
+ choices: List[ChatChoice]
25
+ usage: ChatUsage
26
+
27
+
28
+ def convert_json_format(input_data):
29
+ """转换 pipeline 输出格式"""
30
+ output_generations = []
31
+ for item in input_data:
32
+ generated_text_list = item.get('generated_text', [])
33
+
34
+ assistant_content = ""
35
+ for message in generated_text_list:
36
+ if message.get('role') == 'assistant':
37
+ assistant_content = message.get('content', '')
38
+ break
39
+
40
+ # 移除 </think>...</think> 标签
41
+ clean_content = re.sub(r'\s*', '', assistant_content, flags=re.DOTALL).strip()
42
+
43
+ output_generations.append([
44
+ {
45
+ "text": clean_content,
46
+ "generationInfo": {
47
+ "finish_reason": "stop"
48
+ }
49
+ }
50
+ ])
51
+
52
+ return {"generations": output_generations}
53
+
54
+
55
+ def create_chat_response(request: Any, pipe=None, tokenizer=None) -> ChatResponse:
56
+ """
57
+ 创建聊天响应 - 使用 pipeline 生成实际响应
58
+ """
59
+ if pipe is None:
60
+ # 如果 pipeline 未初始化,返回模拟响应
61
+ response_message = {
62
+ "role": "assistant",
63
+ "content": "模型正在初始化中,请稍后重试..."
64
+ }
65
+ completion_text = response_message["content"]
66
+ else:
67
+ # 使用 pipeline 生成响应
68
+ messages = request.messages
69
+
70
+ # 从 request 获取 max_new_tokens,如果没有则使用默认值 1000
71
+ # max_new_tokens = request.max_tokens if request.max_tokens is not None else 1000
72
+ max_new_tokens = request.max_tokens if request.max_tokens is not None else None
73
+
74
+ # 调用 pipeline
75
+ result = pipe(messages, max_new_tokens=max_new_tokens)
76
+ # result = pipe(messages)
77
+
78
+ # 转换格式
79
+ converted_result = convert_json_format(result)
80
+
81
+ # 获取生成的文本
82
+ completion_text = converted_result["generations"][0][0]["text"]
83
+
84
+ response_message = {
85
+ "role": "assistant",
86
+ "content": completion_text
87
+ }
88
+
89
+ # 计算 token 数量
90
+ if tokenizer:
91
+ prompt_tokens = sum(len(tokenizer.encode(msg.get("content", ""))) for msg in request.messages)
92
+ completion_tokens = len(tokenizer.encode(completion_text))
93
+ else:
94
+ # 简化估算
95
+ prompt_tokens = sum(len(msg.get("content", "")) for msg in request.messages) // 4
96
+ completion_tokens = len(completion_text) // 4
97
+
98
+ return ChatResponse(
99
+ id=f"chatcmpl-{int(time.time())}",
100
+ object="chat.completion",
101
+ created=int(time.time()),
102
+ model=request.model,
103
+ choices=[
104
+ ChatChoice(
105
+ index=0,
106
+ message=response_message,
107
+ finish_reason="stop"
108
+ )
109
+ ],
110
+ usage=ChatUsage(
111
+ prompt_tokens=prompt_tokens,
112
+ completion_tokens=completion_tokens,
113
+ total_tokens=prompt_tokens + completion_tokens
114
+ )
115
+ )
utils/model.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ from pathlib import Path
4
+ from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
5
+ from huggingface_hub import login
6
+ from fastapi import HTTPException
7
+ from pydantic import BaseModel
8
+
9
+
10
+ class DownloadRequest(BaseModel):
11
+ model: str
12
+
13
+
14
+ def check_model(model_name):
15
+ """
16
+ 检查模型是否存在
17
+ 参数: model_name - 从 request 传递过来的模型名称
18
+ 返回: (model_name, cache_dir, success)
19
+ """
20
+ cache_dir = "./my_model_cache"
21
+
22
+ # 检查模型是否已存在于缓存中
23
+ model_path = Path(cache_dir) / f"models--{model_name.replace('/', '--')}"
24
+ snapshot_path = model_path / "snapshots"
25
+
26
+ if snapshot_path.exists() and any(snapshot_path.iterdir()):
27
+ print(f"✓ 模型 {model_name} 已存在于缓存中")
28
+ try:
29
+ tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=cache_dir)
30
+ return model_name, cache_dir, True
31
+ except Exception as e:
32
+ print(f"⚠ 加载现有模型失败: {e}")
33
+ return model_name, cache_dir, False
34
+ else:
35
+ raise HTTPException(status_code=404, detail=f"模型 `{model_name}` 不存在,请先下载")
36
+
37
+
38
+ def download_model(model_name):
39
+ """
40
+ 下载指定的模型
41
+ 参数: model_name - 要下载的模型名称
42
+ 返回: (success, message)
43
+ """
44
+ cache_dir = "./my_model_cache"
45
+
46
+ print(f"开始下载模型: {model_name}")
47
+ print(f"缓存目录: {cache_dir}")
48
+
49
+ # 登录 Hugging Face(可选,用于需要认证的模型)
50
+ token = os.getenv("HUGGINGFACE_TOKEN")
51
+ if token:
52
+ try:
53
+ print("登录 Hugging Face...")
54
+ login(token=token)
55
+ print("✓ HuggingFace 登录成功!")
56
+ except Exception as e:
57
+ print(f"⚠ 登录失败: {e}")
58
+ print("继续使用公开模型")
59
+ else:
60
+ print("ℹ 未设置 HUGGINGFACE_TOKEN - 仅使用公开模型")
61
+
62
+ try:
63
+ # 下载 tokenizer
64
+ print("正在下载 tokenizer...")
65
+ tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=cache_dir)
66
+ print("✓ Tokenizer 下载成功!")
67
+
68
+ # 下载模型
69
+ print("正在下载模型...")
70
+ model = AutoModelForCausalLM.from_pretrained(model_name, cache_dir=cache_dir)
71
+ print("✓ 模型下载成功!")
72
+
73
+ print(f"✓ 模型和 tokenizer 已成功下载到 {cache_dir}")
74
+ return True, f"模型 {model_name} 下载成功"
75
+
76
+ except Exception as e:
77
+ print(f"✗ 下载模型时出错: {e}")
78
+ return False, f"下载失败: {str(e)}"
79
+
80
+
81
+ def initialize_pipeline(model_name):
82
+ """
83
+ 使用模型初始化 pipeline
84
+ 参数: model_name - 从 request 传递过来的模型名称
85
+ 返回: (pipe, tokenizer, success)
86
+ """
87
+ model_name, cache_dir, success = check_model(model_name)
88
+
89
+ if not success:
90
+ return None, None, False
91
+
92
+ try:
93
+ # 确保 tokenizer 已加载
94
+ tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=cache_dir)
95
+
96
+ print(f"使用 {model_name} 初始化 pipeline...")
97
+ # 移除 cache_dir 参数,只传递给 AutoTokenizer 和 AutoModelForCausalLM
98
+ pipe = pipeline("text-generation", model=model_name, tokenizer=tokenizer)
99
+ print("✓ Pipeline 初始化成功!")
100
+
101
+ return pipe, tokenizer, True
102
+
103
+ except Exception as e:
104
+ print(f"✗ Pipeline 初始化失败: {e}")
105
+ return None, None, False