jiang1002 commited on
Commit
a83510c
·
verified ·
1 Parent(s): 6982706

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -95
app.py CHANGED
@@ -10,98 +10,35 @@ import traceback
10
  logging.basicConfig(level=logging.INFO)
11
  logger = logging.getLogger(__name__)
12
 
13
- # ===== 测试代码开=====
14
- print("="*50)
15
- print("🔍 开始测试模型调用")
16
- print("="*50)
17
 
 
18
  HF_TOKEN = os.getenv("HF_TOKEN")
19
  if not HF_TOKEN:
20
- print("❌ 错误: HF_TOKEN 环境变量未设置!")
21
- else:
22
- print(f"✅ HF_TOKEN 已设置 (长度: {len(HF_TOKEN)})")
23
- if HF_TOKEN.startswith("hf_"):
24
- print("✅ HF_TOKEN 格式正确")
25
- else:
26
- print("⚠️ 警告: HF_TOKEN 格式可能不正确,应以 hf_ 开头")
27
-
28
- # 测试你的模型
29
- model_id = "jiang1002/chatglm-6b-adgen"
30
- print(f"\n📊 正在测试模型 '{model_id}'...")
31
-
32
- # 方法1:测试 Hugging Face 免费推理(不指定 provider)
33
- try:
34
- print("\n🔄 测试1: 使用 Hugging Face 免费推理...")
35
- client1 = InferenceClient(token=HF_TOKEN)
36
- response1 = client1.text_generation(
37
- "你好",
38
- model=model_id,
39
- max_new_tokens=20
40
- )
41
- print(f"✅ 免费推理成功! 响应: {response1[:50]}...")
42
- except Exception as e:
43
- print(f"❌ 免费推理失败: {str(e)}")
44
 
45
- # 方法2:测试 auto provider(自动选择)
46
- try:
47
- print("\n🔄 测试2: 使用 auto provider...")
48
- client2 = InferenceClient(provider="auto", token=HF_TOKEN)
49
- response2 = client2.chat.completions.create(
50
- model=model_id,
51
- messages=[{"role": "user", "content": "你好"}],
52
- max_tokens=20
53
- )
54
- print(f"✅ auto provider 成功! 响应: {response2.choices[0].message.content[:50]}...")
55
- except Exception as e:
56
- print(f"❌ auto provider 失败: {str(e)}")
57
 
58
- # 方法3:测试 Groq如果配置了
59
- try:
60
- print("\n🔄 测试3: 使用 Groq...")
61
- client3 = InferenceClient(provider="groq", token=HF_TOKEN)
62
- response3 = client3.chat.completions.create(
63
- model=model_id,
64
- messages=[{"role": "user", "content": "你好"}],
65
- max_tokens=20
66
- )
67
- print(f"✅ Groq 成功! 响应: {response3.choices[0].message.content[:50]}...")
68
- except Exception as e:
69
- print(f"❌ Groq 失败: {str(e)}")
70
 
71
- # 方法4:测试 Together AI如果配置了
72
  try:
73
- print("\n🔄 测试4: 使用 Together AI...")
74
- client4 = InferenceClient(provider="together-ai", token=HF_TOKEN)
75
- response4 = client4.chat.completions.create(
76
- model=model_id,
77
  messages=[{"role": "user", "content": "你好"}],
78
- max_tokens=20
79
  )
80
- print(f"✅ Together AI 成功! 响应: {response4.choices[0].message.content[:50]}...")
81
  except Exception as e:
82
- print(f"❌ Together AI 失败: {str(e)}")
83
-
84
- print("\n" + "="*50)
85
- print("🔍 测试结束,继续启动应用...")
86
- print("="*50)
87
- # ===== 测试代码结束 =====
88
-
89
- # 初始化 FastAPI
90
- app = FastAPI()
91
-
92
- # 从环境变量获取 Hugging Face Token
93
- HF_TOKEN = os.getenv("HF_TOKEN")
94
- if not HF_TOKEN:
95
- logger.warning("⚠️ 未设置 HF_TOKEN 环境变量,API 调用可能失败")
96
-
97
- # 初始化 InferenceClient
98
- # 这里用 provider="auto" 让系统自动选择可用提供商
99
- client = InferenceClient(provider="auto", token=HF_TOKEN)
100
 
101
- # 你的模型名称
102
- MODEL_ID = "jiang1002/chatglm-6b-adgen" # 或者换成其他公开模型
103
-
104
- # --- 1. API 接口 ---
105
  @app.post("/generate")
106
  async def generate(request: Request):
107
  try:
@@ -109,7 +46,6 @@ async def generate(request: Request):
109
  prompt = data.get("text", "")
110
  messages = data.get("messages", [])
111
 
112
- # 如果提供了完整的 messages 格式,就用它
113
  if messages:
114
  response = client.chat.completions.create(
115
  model=MODEL_ID,
@@ -117,25 +53,22 @@ async def generate(request: Request):
117
  )
118
  result = response.choices[0].message.content
119
  else:
120
- # 否则用简单的 prompt 格式
121
- response = client.text_generation(
122
- prompt,
123
  model=MODEL_ID,
124
- max_new_tokens=512,
125
- temperature=0.7
126
  )
127
- result = response
128
 
129
  return {"success": True, "result": result}
130
  except Exception as e:
131
  logger.error(f"API 调用失败: {str(e)}")
132
  return {"success": False, "error": str(e)}
133
 
134
- # --- 2. Gradio 聊天界面 ---
135
  def chat_func(message, history):
136
  """Gradio 聊天函数"""
137
  try:
138
- # 历史记录转换为 messages 格式
139
  messages = []
140
  for human, assistant in history:
141
  messages.append({"role": "user", "content": human})
@@ -153,20 +86,20 @@ def chat_func(message, history):
153
  return response.choices[0].message.content
154
  except Exception as e:
155
  logger.error(f"聊天失败: {str(e)}")
156
- logger.error(f"详细错误: {traceback.format_exc()}") # 添加这行
157
- return f"调用失败: {str(e)}\n\n{traceback.format_exc()}"
158
 
159
  # 创建 Gradio 界面
160
  demo = gr.ChatInterface(
161
  fn=chat_func,
162
- title="ChatGLM 广告生成助手 (使用 Inference Providers)",
163
- description="后台使用 Hugging Face Inference Providers,无需本地 GPU"
164
  )
165
 
166
  # 挂载 Gradio
167
  app = gr.mount_gradio_app(app, demo, path="/")
168
 
169
- # 添加健康检查端点
170
  @app.get("/health")
171
  async def health():
172
  return {"status": "ok", "model": MODEL_ID}
 
10
  logging.basicConfig(level=logging.INFO)
11
  logger = logging.getLogger(__name__)
12
 
13
+ # FastAPI
14
+ app = FastAPI()
 
 
15
 
16
+ # 获取 token
17
  HF_TOKEN = os.getenv("HF_TOKEN")
18
  if not HF_TOKEN:
19
+ logger.error("❌ HF_TOKEN 环境变量未设置!")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
+ # 选择一个兼容的模型(改这里!
22
+ MODEL_ID = "meta-llama/Llama-3.2-3B-Instruct" # 轻量级,免费,支持中文
23
+ # MODEL_ID = "microsoft/Phi-3.5-mini-instruct" # 备选1
24
+ # MODEL_ID = "Qwen/Qwen2.5-7B-Instruct" # 备选2
 
 
 
 
 
 
 
 
25
 
26
+ # 创建客户端用 auto 模式
27
+ client = InferenceClient(provider="auto", token=HF_TOKEN)
 
 
 
 
 
 
 
 
 
 
28
 
29
+ # 测试模型是否可用启动时验证
30
  try:
31
+ logger.info(f"正在测试模型 {MODEL_ID}...")
32
+ test_response = client.chat.completions.create(
33
+ model=MODEL_ID,
 
34
  messages=[{"role": "user", "content": "你好"}],
35
+ max_tokens=10
36
  )
37
+ logger.info(f"✅ 模型可用!响应: {test_response.choices[0].message.content}")
38
  except Exception as e:
39
+ logger.error(f"❌ 模型测试失败: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
+ # --- API 接口 ---
 
 
 
42
  @app.post("/generate")
43
  async def generate(request: Request):
44
  try:
 
46
  prompt = data.get("text", "")
47
  messages = data.get("messages", [])
48
 
 
49
  if messages:
50
  response = client.chat.completions.create(
51
  model=MODEL_ID,
 
53
  )
54
  result = response.choices[0].message.content
55
  else:
56
+ response = client.chat.completions.create(
 
 
57
  model=MODEL_ID,
58
+ messages=[{"role": "user", "content": prompt}]
 
59
  )
60
+ result = response.choices[0].message.content
61
 
62
  return {"success": True, "result": result}
63
  except Exception as e:
64
  logger.error(f"API 调用失败: {str(e)}")
65
  return {"success": False, "error": str(e)}
66
 
67
+ # --- Gradio 聊天界面 ---
68
  def chat_func(message, history):
69
  """Gradio 聊天函数"""
70
  try:
71
+ # 转换历史记录
72
  messages = []
73
  for human, assistant in history:
74
  messages.append({"role": "user", "content": human})
 
86
  return response.choices[0].message.content
87
  except Exception as e:
88
  logger.error(f"聊天失败: {str(e)}")
89
+ logger.error(traceback.format_exc())
90
+ return f"调用失败: {str(e)}"
91
 
92
  # 创建 Gradio 界面
93
  demo = gr.ChatInterface(
94
  fn=chat_func,
95
+ title="AI 聊天助手 (使用 Inference Providers)",
96
+ description=f"后台模型: {MODEL_ID}"
97
  )
98
 
99
  # 挂载 Gradio
100
  app = gr.mount_gradio_app(app, demo, path="/")
101
 
102
+ # 健康检查
103
  @app.get("/health")
104
  async def health():
105
  return {"status": "ok", "model": MODEL_ID}