han145 commited on
Commit
e86b33d
·
verified ·
1 Parent(s): 5727b08

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -234
app.py CHANGED
@@ -1,236 +1,33 @@
1
- import os
2
- import time
3
- import logging
4
- from fastapi import FastAPI, Request, HTTPException, Depends, status
5
- from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
6
- from fastapi.responses import JSONResponse
7
- from transformers import AutoTokenizer, AutoModelForCausalLM
8
- import torch
9
- import gc
10
-
11
- # 日志配置
12
- logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
13
- logger = logging.getLogger(__name__)
14
-
15
- # 全局变量
16
- model = None
17
- tokenizer = None
18
-
19
- # 配置
20
- MODEL_NAME = "Qwen/Qwen1.5-0.5B-Chat"
21
- MAX_TOKENS = 512
22
- DEVICE = "cpu" # 强制使用 CPU
23
-
24
- # API 密钥配置
25
- API_KEYS = os.getenv("API_KEYS", "your-secret-key-1,your-secret-key-2").split(",")
26
- API_AUTH_ENABLED = os.getenv("API_AUTH_ENABLED", "true").lower() == "true"
27
-
28
- # Bearer 认证
29
- security = HTTPBearer()
30
-
31
- def verify_api_key(credentials: HTTPAuthorizationCredentials = Depends(security)):
32
- """验证 API 密钥"""
33
- if not API_AUTH_ENABLED:
34
- return True
35
- if credentials.scheme != "Bearer":
36
- raise HTTPException(
37
- status_code=status.HTTP_401_UNAUTHORIZED,
38
- detail="Invalid authentication scheme. Use 'Bearer' token",
39
- headers={"WWW-Authenticate": "Bearer"},
40
- )
41
- api_key = credentials.credentials
42
- if api_key not in API_KEYS:
43
- raise HTTPException(
44
- status_code=status.HTTP_401_UNAUTHORIZED,
45
- detail="Invalid API key",
46
- headers={"WWW-Authenticate": "Bearer"},
47
- )
48
- return True
49
-
50
- def load_model():
51
- """加载模型"""
52
- global model, tokenizer
53
- try:
54
- logger.info(f"开始加载模型: {MODEL_NAME}")
55
- tokenizer = AutoTokenizer.from_pretrained(
56
- MODEL_NAME,
57
- trust_remote_code=True
58
- )
59
- if tokenizer.pad_token is None:
60
- tokenizer.pad_token = tokenizer.eos_token
61
-
62
- model = AutoModelForCausalLM.from_pretrained(
63
- MODEL_NAME,
64
- torch_dtype=torch.float16,
65
- device_map=None,
66
- low_cpu_mem_usage=True,
67
- trust_remote_code=True
68
- )
69
- model = model.to(DEVICE)
70
- model.eval()
71
- logger.info("模型加载成功")
72
- return True
73
- except Exception as e:
74
- logger.error(f"模型加载失败: {e}")
75
- return False
76
-
77
- def apply_chat_template(messages):
78
- """将 messages 转换为 Qwen 的对话格式"""
79
- text = ""
80
- for msg in messages:
81
- role = msg.get("role", "").lower()
82
- content = msg.get("content", "")
83
-
84
- # 处理 content 可能是 list 的情况(兼容多模态格式)
85
- if isinstance(content, list):
86
- text_parts = []
87
- for item in content:
88
- if isinstance(item, dict):
89
- if item.get("type") == "text":
90
- text_parts.append(str(item.get("text", "")))
91
- elif isinstance(item, str):
92
- text_parts.append(item)
93
- content_str = " ".join([p for p in text_parts if p]).strip()
94
- else:
95
- content_str = str(content).strip()
96
-
97
- if not content_str:
98
- continue
99
-
100
- if role == "system":
101
- text += f"<|im_start|>system\n{content_str}<|im_end|>\n"
102
- elif role == "user":
103
- text += f"<|im_start|>user\n{content_str}<|im_end|>\n"
104
- elif role == "assistant":
105
- text += f"<|im_start|>assistant\n{content_str}<|im_end|>\n"
106
-
107
- text += "<|im_start|>assistant\n"
108
- return text
109
-
110
- def generate_chat_response(messages, max_tokens=512, temperature=0.7):
111
- """生成回复"""
112
- if model is None or tokenizer is None:
113
- return {"error": "模型未加载"}
114
-
115
- try:
116
- prompt = apply_chat_template(messages)
117
- logger.info(f"输入文本类型: {type(prompt)}, 长度: {len(prompt)}")
118
-
119
- inputs = tokenizer(
120
- [prompt],
121
- return_tensors="pt",
122
- truncation=True,
123
- max_length=2048, # 改小,防止上下文过长影响生成
124
- padding=True
125
- )
126
- inputs = {k: v.to(DEVICE) for k, v in inputs.items()}
127
-
128
- with torch.no_grad():
129
- outputs = model.generate(
130
- **inputs,
131
- max_new_tokens=384, # 暂时写死为 384,确保有足够生成空间
132
- do_sample=True,
133
- temperature=temperature,
134
- top_p=0.85,
135
- repetition_penalty=1.05,
136
- pad_token_id=tokenizer.eos_token_id,
137
- eos_token_id=tokenizer.eos_token_id,
138
- )
139
-
140
-
141
- return {"text": response}
142
-
143
- except Exception as e:
144
- logger.error(f"生成失败: {str(e)}", exc_info=True)
145
- return {"error": str(e)}
146
-
147
- # FastAPI 应用
148
- app = FastAPI(
149
- title="Qwen OpenAI-compatible API",
150
- version="1.0",
151
- description="仅提供 /v1/chat/completions 端点"
152
  )
153
 
154
- @app.on_event("startup")
155
- async def startup_event():
156
- if load_model():
157
- logger.info("服务启动完成")
158
- else:
159
- logger.error("模型加载失败,服务可能无法正常工作")
160
-
161
- # 健康检查
162
- @app.get("/health")
163
- async def health_check():
164
- return {
165
- "status": "healthy" if model is not None else "model loading failed",
166
- "model_loaded": model is not None,
167
- "timestamp": int(time.time())
168
- }
169
-
170
- # 根路径
171
- @app.get("/")
172
- async def root():
173
- return {"message": "Qwen API 服务运行中,仅支持 /v1/chat/completions"}
174
-
175
- # 核心端点
176
- @app.post("/v1/chat/completions")
177
- async def create_chat_completion(
178
- request: Request,
179
- auth_valid: bool = Depends(verify_api_key)
180
- ):
181
- try:
182
- data = await request.json()
183
- messages = data.get("messages", [])
184
- max_tokens = data.get("max_tokens", MAX_TOKENS)
185
- temperature = data.get("temperature", 0.7)
186
-
187
- logger.info(f"收到请求: messages_count={len(messages)}")
188
-
189
- if not messages or not isinstance(messages, list):
190
- raise ValueError("messages 必须是非空列表")
191
-
192
- result = generate_chat_response(messages, max_tokens, temperature)
193
-
194
- if "error" in result:
195
- raise RuntimeError(result["error"])
196
-
197
- response_data = {
198
- "id": f"chatcmpl-{int(time.time()*1000)}",
199
- "object": "chat.completion",
200
- "created": int(time.time()),
201
- "model": MODEL_NAME,
202
- "choices": [
203
- {
204
- "index": 0,
205
- "message": {
206
- "role": "assistant",
207
- "content": result["text"]
208
- },
209
- "finish_reason": "stop"
210
- }
211
- ]
212
- }
213
-
214
- return response_data
215
-
216
- except Exception as e:
217
- logger.error(f"Chat Completions 错误: {str(e)}", exc_info=True)
218
- return JSONResponse(
219
- status_code=500,
220
- content={
221
- "error": {
222
- "message": str(e),
223
- "type": "internal_server_error"
224
- }
225
- }
226
- )
227
-
228
- if __name__ == "__main__":
229
- import uvicorn
230
- uvicorn.run(
231
- app,
232
- host="0.0.0.0",
233
- port=7860,
234
- workers=1,
235
- log_level="info"
236
- )
 
1
+ import gradio as gr
2
+ from transformers import pipeline
3
+
4
+ # 1. 加载模型
5
+ # 这里选用 Qwen2.5-0.5B-Instruct,参数量小,免费 CPU 也能跑得动
6
+ model_id = "Qwen/Qwen2.5-0.5B-Instruct"
7
+ pipe = pipeline("text-generation", model=model_id)
8
+
9
+ # 2. 定义对话生成函数
10
+ def generate_response(prompt):
11
+ # 构建适合 Instruct 模型的 Prompt 格式
12
+ messages = [
13
+ {"role": "system", "content": "你是一个乐于助人的 AI 助手。"},
14
+ {"role": "user", "content": prompt},
15
+ ]
16
+
17
+ # 生成回答
18
+ result = pipe(messages, max_new_tokens=256, truncation=True)
19
+
20
+ # 提取并返回生成的文本
21
+ return result[0]['generated_text'][-1]['content']
22
+
23
+ # 3. 构建 Gradio 可视化界面
24
+ iface = gr.Interface(
25
+ fn=generate_response,
26
+ inputs=gr.Textbox(lines=5, placeholder="在这里输入你的问题,例如:给我写一首关于春天的诗..."),
27
+ outputs=gr.Textbox(label="AI 助手的回答"),
28
+ title="我的第一个免费大模型应用 🚀",
29
+ description=f"当前运行模型: {model_id}。注意:由于运行在免费 CPU 上,生成速度可能需要几秒到十几秒不等,请耐心等待。"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  )
31
 
32
+ # 4. 启动应用
33
+ iface.launch()