jiang1002 commited on
Commit
8f97a31
·
verified ·
1 Parent(s): a83510c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +95 -28
app.py CHANGED
@@ -10,35 +10,98 @@ import traceback
10
  logging.basicConfig(level=logging.INFO)
11
  logger = logging.getLogger(__name__)
12
 
13
- # FastAPI
14
- app = FastAPI()
 
 
15
 
16
- # 获取 token
17
  HF_TOKEN = os.getenv("HF_TOKEN")
18
  if not HF_TOKEN:
19
- logger.error("❌ HF_TOKEN 环境变量未设置!")
 
 
 
 
 
 
20
 
21
- # 选择一个兼容的模型(改这里!)
22
- MODEL_ID = "meta-llama/Llama-3.2-3B-Instruct" # 轻量级,免费,支持中文
23
- # MODEL_ID = "microsoft/Phi-3.5-mini-instruct" # 备选1
24
- # MODEL_ID = "Qwen/Qwen2.5-7B-Instruct" # 备选2
25
 
26
- # 创建客户端(用 auto 模式
27
- client = InferenceClient(provider="auto", token=HF_TOKEN)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
- # 测试模型是否可用启动时验证
30
  try:
31
- logger.info(f"正在测试模型 {MODEL_ID}...")
32
- test_response = client.chat.completions.create(
33
- model=MODEL_ID,
 
34
  messages=[{"role": "user", "content": "你好"}],
35
- max_tokens=10
36
  )
37
- logger.info(f"✅ 模型可用!响应: {test_response.choices[0].message.content}")
38
  except Exception as e:
39
- logger.error(f"❌ 模型测试失败: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
- # --- API 接口 ---
 
 
 
42
  @app.post("/generate")
43
  async def generate(request: Request):
44
  try:
@@ -46,6 +109,7 @@ async def generate(request: Request):
46
  prompt = data.get("text", "")
47
  messages = data.get("messages", [])
48
 
 
49
  if messages:
50
  response = client.chat.completions.create(
51
  model=MODEL_ID,
@@ -53,22 +117,25 @@ async def generate(request: Request):
53
  )
54
  result = response.choices[0].message.content
55
  else:
56
- response = client.chat.completions.create(
 
 
57
  model=MODEL_ID,
58
- messages=[{"role": "user", "content": prompt}]
 
59
  )
60
- result = response.choices[0].message.content
61
 
62
  return {"success": True, "result": result}
63
  except Exception as e:
64
  logger.error(f"API 调用失败: {str(e)}")
65
  return {"success": False, "error": str(e)}
66
 
67
- # --- Gradio 聊天界面 ---
68
  def chat_func(message, history):
69
  """Gradio 聊天函数"""
70
  try:
71
- # 转换历史记录
72
  messages = []
73
  for human, assistant in history:
74
  messages.append({"role": "user", "content": human})
@@ -86,20 +153,20 @@ def chat_func(message, history):
86
  return response.choices[0].message.content
87
  except Exception as e:
88
  logger.error(f"聊天失败: {str(e)}")
89
- logger.error(traceback.format_exc())
90
- return f"调用失败: {str(e)}"
91
 
92
  # 创建 Gradio 界面
93
  demo = gr.ChatInterface(
94
  fn=chat_func,
95
- title="AI 聊天助手 (使用 Inference Providers)",
96
- description=f"后台模型: {MODEL_ID}"
97
  )
98
 
99
  # 挂载 Gradio
100
  app = gr.mount_gradio_app(app, demo, path="/")
101
 
102
- # 健康检查
103
  @app.get("/health")
104
  async def health():
105
  return {"status": "ok", "model": MODEL_ID}
 
10
  logging.basicConfig(level=logging.INFO)
11
  logger = logging.getLogger(__name__)
12
 
13
+ # ===== 测试代码开=====
14
+ print("="*50)
15
+ print("🔍 开始测试模型调用")
16
+ print("="*50)
17
 
 
18
  HF_TOKEN = os.getenv("HF_TOKEN")
19
  if not HF_TOKEN:
20
+ print("❌ 错误: HF_TOKEN 环境变量未设置!")
21
+ else:
22
+ print(f"✅ HF_TOKEN 已设置 (长度: {len(HF_TOKEN)})")
23
+ if HF_TOKEN.startswith("hf_"):
24
+ print("✅ HF_TOKEN 格式正确")
25
+ else:
26
+ print("⚠️ 警告: HF_TOKEN 格式可能不正确,应以 hf_ 开头")
27
 
28
+ # 测试你的模型
29
+ model_id = "jiang1002/chatglm-6b-adgen"
30
+ print(f"\n📊 正在测试模型 '{model_id}'...")
 
31
 
32
+ # 方法1:测试 Hugging Face 免费推理(不指定 provider
33
+ try:
34
+ print("\n🔄 测试1: 使用 Hugging Face 免费推理...")
35
+ client1 = InferenceClient(token=HF_TOKEN)
36
+ response1 = client1.text_generation(
37
+ "你好",
38
+ model=model_id,
39
+ max_new_tokens=20
40
+ )
41
+ print(f"✅ 免费推理成功! 响应: {response1[:50]}...")
42
+ except Exception as e:
43
+ print(f"❌ 免费推理失败: {str(e)}")
44
+
45
+ # 方法2:测试 auto provider(自动选择)
46
+ try:
47
+ print("\n🔄 测试2: 使用 auto provider...")
48
+ client2 = InferenceClient(provider="auto", token=HF_TOKEN)
49
+ response2 = client2.chat.completions.create(
50
+ model=model_id,
51
+ messages=[{"role": "user", "content": "你好"}],
52
+ max_tokens=20
53
+ )
54
+ print(f"✅ auto provider 成功! 响应: {response2.choices[0].message.content[:50]}...")
55
+ except Exception as e:
56
+ print(f"❌ auto provider 失败: {str(e)}")
57
+
58
+ # 方法3:测试 Groq(如果配置了)
59
+ try:
60
+ print("\n🔄 测试3: 使用 Groq...")
61
+ client3 = InferenceClient(provider="groq", token=HF_TOKEN)
62
+ response3 = client3.chat.completions.create(
63
+ model=model_id,
64
+ messages=[{"role": "user", "content": "你好"}],
65
+ max_tokens=20
66
+ )
67
+ print(f"✅ Groq 成功! 响应: {response3.choices[0].message.content[:50]}...")
68
+ except Exception as e:
69
+ print(f"❌ Groq 失败: {str(e)}")
70
 
71
+ # 方法4:测试 Together AI如果配置了
72
  try:
73
+ print("\n🔄 测试4: 使用 Together AI...")
74
+ client4 = InferenceClient(provider="together-ai", token=HF_TOKEN)
75
+ response4 = client4.chat.completions.create(
76
+ model=model_id,
77
  messages=[{"role": "user", "content": "你好"}],
78
+ max_tokens=20
79
  )
80
+ print(f"✅ Together AI 成功! 响应: {response4.choices[0].message.content[:50]}...")
81
  except Exception as e:
82
+ print(f"❌ Together AI 失败: {str(e)}")
83
+
84
+ print("\n" + "="*50)
85
+ print("🔍 测试结束,继续启动应用...")
86
+ print("="*50)
87
+ # ===== 测试代码结束 =====
88
+
89
+ # 初始化 FastAPI
90
+ app = FastAPI()
91
+
92
+ # 从环境变量获取 Hugging Face Token
93
+ HF_TOKEN = os.getenv("HF_TOKEN")
94
+ if not HF_TOKEN:
95
+ logger.warning("⚠️ 未设置 HF_TOKEN 环境变量,API 调用可能失败")
96
+
97
+ # 初始化 InferenceClient
98
+ # 这里用 provider="auto" 让系统自动选择可用提供商
99
+ client = InferenceClient(provider="auto", token=HF_TOKEN)
100
 
101
+ # 你的模型名称
102
+ MODEL_ID = "jiang1002/chatglm-6b-adgen" # 或者换成其他公开模型
103
+
104
+ # --- 1. API 接口 ---
105
  @app.post("/generate")
106
  async def generate(request: Request):
107
  try:
 
109
  prompt = data.get("text", "")
110
  messages = data.get("messages", [])
111
 
112
+ # 如果提供了完整的 messages 格式,就用它
113
  if messages:
114
  response = client.chat.completions.create(
115
  model=MODEL_ID,
 
117
  )
118
  result = response.choices[0].message.content
119
  else:
120
+ # 否则用简单的 prompt 格式
121
+ response = client.text_generation(
122
+ prompt,
123
  model=MODEL_ID,
124
+ max_new_tokens=512,
125
+ temperature=0.7
126
  )
127
+ result = response
128
 
129
  return {"success": True, "result": result}
130
  except Exception as e:
131
  logger.error(f"API 调用失败: {str(e)}")
132
  return {"success": False, "error": str(e)}
133
 
134
+ # --- 2. Gradio 聊天界面 ---
135
  def chat_func(message, history):
136
  """Gradio 聊天函数"""
137
  try:
138
+ # 历史记录转换为 messages 格式
139
  messages = []
140
  for human, assistant in history:
141
  messages.append({"role": "user", "content": human})
 
153
  return response.choices[0].message.content
154
  except Exception as e:
155
  logger.error(f"聊天失败: {str(e)}")
156
+ logger.error(f"详细错误: {traceback.format_exc()}") # 添加这行
157
+ return f"调用失败: {str(e)}\n\n{traceback.format_exc()}"
158
 
159
  # 创建 Gradio 界面
160
  demo = gr.ChatInterface(
161
  fn=chat_func,
162
+ title="ChatGLM 广告生成助手 (使用 Inference Providers)",
163
+ description="后台使用 Hugging Face Inference Providers,无需本地 GPU"
164
  )
165
 
166
  # 挂载 Gradio
167
  app = gr.mount_gradio_app(app, demo, path="/")
168
 
169
+ # 添加健康检查端点
170
  @app.get("/health")
171
  async def health():
172
  return {"status": "ok", "model": MODEL_ID}