Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -7,110 +7,96 @@ from pathlib import Path
|
|
| 7 |
|
| 8 |
# --- 核心配置 ---
|
| 9 |
OPENROUTER_API_KEY = os.environ.get("OPENROUTER_API_KEY")
|
| 10 |
-
MODEL_ID = "google/gemini-2.0-flash-001"
|
| 11 |
|
| 12 |
-
# --- 你的专属 HTML 声明 (
|
| 13 |
-
INFO_HTML = """
|
| 14 |
-
<div style="text-align: left; border-left: 4px solid #2196F3; padding-left: 15px; margin-bottom: 20px;">
|
| 15 |
<h3>MG TaxAI | 跨境财税合规实验室 (Beta)</h3>
|
| 16 |
-
<p>本系统依托 <b>MG 核心智库</b> 构建
|
| 17 |
-
|
| 18 |
-
<hr style="border: 0; border-top: 1px solid #eee; margin: 10px 0;">
|
| 19 |
-
<p style="font-size: 0.85em; color: #666;">
|
| 20 |
-
<b>⚠️ AI 免责声明:</b><br>
|
| 21 |
-
本系统生成的内容由人工智能根据现有库文件分析得出,不构成正式的法律或税务建议。在使用本系统结果进行任何商业决策前,请务必咨询 MG Consult 专业团队。
|
| 22 |
-
</p>
|
| 23 |
-
</div>
|
| 24 |
-
"""
|
| 25 |
|
| 26 |
# --- 深度知识库检索引擎 (RAG) ---
|
| 27 |
def get_knowledge_context(query):
|
| 28 |
-
"""
|
| 29 |
-
扫描本地 Treaties 和 InvestmentGuide 文件夹中的相关 PDF 内容。
|
| 30 |
-
这里实现一个基础的关键词匹配检索,确保 AI 能“读”到书。
|
| 31 |
-
"""
|
| 32 |
context_chunks = []
|
| 33 |
base_dirs = ["Treaties", "InvestmentGuide"]
|
| 34 |
-
|
| 35 |
-
# 简单的关键词提取(可以根据需要改进)
|
| 36 |
keywords = [word for word in query.split() if len(word) > 1]
|
| 37 |
|
| 38 |
for folder in base_dirs:
|
| 39 |
path = Path(folder)
|
| 40 |
if not path.exists(): continue
|
| 41 |
|
| 42 |
-
# 扫描文件夹下的 PDF
|
| 43 |
for pdf_file in path.rglob("*.pdf"):
|
| 44 |
-
# 如果文件名包含关键词,优先读取
|
| 45 |
if any(kw.lower() in pdf_file.name.lower() for kw in keywords):
|
| 46 |
try:
|
| 47 |
with fitz.open(pdf_file) as doc:
|
| 48 |
-
#
|
| 49 |
-
text = "".join([page.get_text() for page in doc[:
|
| 50 |
context_chunks.append(f"来自文件 [{pdf_file.name}]:\n{text}")
|
| 51 |
except:
|
| 52 |
continue
|
| 53 |
|
| 54 |
-
|
|
|
|
| 55 |
|
| 56 |
# --- API 专家级调用逻辑 ---
|
| 57 |
def ask_ai(message, history):
|
| 58 |
if not OPENROUTER_API_KEY:
|
| 59 |
-
return "⚠️ 未检测到 API Key,请
|
| 60 |
|
| 61 |
-
# 1. 获取本地知识库背景
|
| 62 |
local_context = get_knowledge_context(message)
|
| 63 |
|
| 64 |
-
#
|
| 65 |
-
# 这里决定了回复的质量和长度
|
| 66 |
system_instruction = """
|
| 67 |
-
你是一位资深的 MG Consulting 国际税务AI。
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
|
|
|
| 73 |
"""
|
| 74 |
|
| 75 |
-
# 3. 组装对话历史
|
| 76 |
messages = [{"role": "system", "content": system_instruction}]
|
|
|
|
|
|
|
| 77 |
for user_msg, assistant_msg in history:
|
| 78 |
messages.append({"role": "user", "content": user_msg})
|
| 79 |
messages.append({"role": "assistant", "content": assistant_msg})
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
current_input = f"【参考知识库内容】:\n{local_context}\n\n【用户当前咨询】:\n{message}"
|
| 83 |
messages.append({"role": "user", "content": current_input})
|
| 84 |
|
| 85 |
-
# 5. 发送请求
|
| 86 |
-
url = "https://openrouter.ai/api/v1/chat/completions"
|
| 87 |
-
headers = {"Authorization": f"Bearer {OPENROUTER_API_KEY}", "Content-Type": "application/json"}
|
| 88 |
payload = {
|
| 89 |
"model": MODEL_ID,
|
| 90 |
"messages": messages,
|
| 91 |
-
"temperature": 0.2,
|
| 92 |
"top_p": 0.9
|
| 93 |
}
|
| 94 |
|
| 95 |
try:
|
| 96 |
-
response = requests.post(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 97 |
if response.status_code == 200:
|
| 98 |
return response.json()['choices'][0]['message']['content']
|
| 99 |
-
return f"❌ 接口响应异常 ({response.status_code})
|
| 100 |
except Exception as e:
|
| 101 |
-
return f"💥 系统连接超时
|
| 102 |
|
| 103 |
# --- 界面构建 ---
|
| 104 |
-
with gr.Blocks(title="MG TaxAI Lab", fill_height=True
|
| 105 |
gr.HTML(INFO_HTML)
|
| 106 |
-
|
| 107 |
-
chatbot = gr.ChatInterface(
|
| 108 |
fn=ask_ai,
|
| 109 |
fill_height=True,
|
| 110 |
retry_btn="🔄 重新生成",
|
| 111 |
-
undo_btn="↩️ 撤回
|
| 112 |
-
clear_btn="🗑️ 清空
|
| 113 |
)
|
| 114 |
|
| 115 |
if __name__ == "__main__":
|
| 116 |
-
|
|
|
|
|
|
| 7 |
|
| 8 |
# --- 核心配置 ---
|
| 9 |
OPENROUTER_API_KEY = os.environ.get("OPENROUTER_API_KEY")
|
| 10 |
+
MODEL_ID = "google/gemini-2.0-flash-001"
|
| 11 |
|
| 12 |
+
# --- 你的专属 HTML 声明 (保持不变) ---
|
| 13 |
+
INFO_HTML = """<div style="text-align: left; border-left: 4px solid #2196F3; padding-left: 15px; margin-bottom: 20px;">
|
|
|
|
| 14 |
<h3>MG TaxAI | 跨境财税合规实验室 (Beta)</h3>
|
| 15 |
+
<p>本系统依托 <b>MG 核心智库</b> 构建...</p>
|
| 16 |
+
</div>"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
|
| 18 |
# --- 深度知识库检索引擎 (RAG) ---
|
| 19 |
def get_knowledge_context(query):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
context_chunks = []
|
| 21 |
base_dirs = ["Treaties", "InvestmentGuide"]
|
|
|
|
|
|
|
| 22 |
keywords = [word for word in query.split() if len(word) > 1]
|
| 23 |
|
| 24 |
for folder in base_dirs:
|
| 25 |
path = Path(folder)
|
| 26 |
if not path.exists(): continue
|
| 27 |
|
|
|
|
| 28 |
for pdf_file in path.rglob("*.pdf"):
|
|
|
|
| 29 |
if any(kw.lower() in pdf_file.name.lower() for kw in keywords):
|
| 30 |
try:
|
| 31 |
with fitz.open(pdf_file) as doc:
|
| 32 |
+
# 增加至前 3 页,获取更多上下文
|
| 33 |
+
text = "".join([page.get_text() for page in doc[:3]])
|
| 34 |
context_chunks.append(f"来自文件 [{pdf_file.name}]:\n{text}")
|
| 35 |
except:
|
| 36 |
continue
|
| 37 |
|
| 38 |
+
# 修正:确保在遍历完所有文件夹后再返回
|
| 39 |
+
return "\n\n".join(context_chunks)[:6000]
|
| 40 |
|
| 41 |
# --- API 专家级调用逻辑 ---
|
| 42 |
def ask_ai(message, history):
|
| 43 |
if not OPENROUTER_API_KEY:
|
| 44 |
+
return "⚠️ 未检测到 API Key,请在 Space 的 Settings -> Secrets 中添加。"
|
| 45 |
|
|
|
|
| 46 |
local_context = get_knowledge_context(message)
|
| 47 |
|
| 48 |
+
# 强化版系统指令:加入避险逻辑和专业深度
|
|
|
|
| 49 |
system_instruction = """
|
| 50 |
+
你是一位资深的 MG Consulting 国际税务专家级 AI。
|
| 51 |
+
|
| 52 |
+
【核心准则】:
|
| 53 |
+
1. 专业性:优先引用参考知识库。若背景不足,基于 2025-2026 最新全球财税准则回答。
|
| 54 |
+
2. 避险:在讨论行业趋势时,使用“大型咨询机构”或“核心智库”等统称,**严禁提及具体的国际会计师事务所名称**。
|
| 55 |
+
3. 风格:直接进入分析,不进行冗长的自我介绍,使用 Markdown 格式(标题、列表、粗体)。
|
| 56 |
+
4. 深度:分析需涵盖税种差异(Income Tax, VAT, Withholding Tax)及双边协定(DTA)影响。
|
| 57 |
"""
|
| 58 |
|
|
|
|
| 59 |
messages = [{"role": "system", "content": system_instruction}]
|
| 60 |
+
|
| 61 |
+
# Gradio 的 history 已经是 list of tuples
|
| 62 |
for user_msg, assistant_msg in history:
|
| 63 |
messages.append({"role": "user", "content": user_msg})
|
| 64 |
messages.append({"role": "assistant", "content": assistant_msg})
|
| 65 |
+
|
| 66 |
+
current_input = f"【参考知识库】:\n{local_context}\n\n【用户咨询】:\n{message}"
|
|
|
|
| 67 |
messages.append({"role": "user", "content": current_input})
|
| 68 |
|
|
|
|
|
|
|
|
|
|
| 69 |
payload = {
|
| 70 |
"model": MODEL_ID,
|
| 71 |
"messages": messages,
|
| 72 |
+
"temperature": 0.2,
|
| 73 |
"top_p": 0.9
|
| 74 |
}
|
| 75 |
|
| 76 |
try:
|
| 77 |
+
response = requests.post(
|
| 78 |
+
"https://openrouter.ai/api/v1/chat/completions",
|
| 79 |
+
headers={"Authorization": f"Bearer {OPENROUTER_API_KEY}", "Content-Type": "application/json"},
|
| 80 |
+
data=json.dumps(payload),
|
| 81 |
+
timeout=60
|
| 82 |
+
)
|
| 83 |
if response.status_code == 200:
|
| 84 |
return response.json()['choices'][0]['message']['content']
|
| 85 |
+
return f"❌ 接口响应异常 ({response.status_code})"
|
| 86 |
except Exception as e:
|
| 87 |
+
return f"💥 系统连接超时: {str(e)}"
|
| 88 |
|
| 89 |
# --- 界面构建 ---
|
| 90 |
+
with gr.Blocks(title="MG TaxAI Lab", fill_height=True) as demo:
|
| 91 |
gr.HTML(INFO_HTML)
|
| 92 |
+
gr.ChatInterface(
|
|
|
|
| 93 |
fn=ask_ai,
|
| 94 |
fill_height=True,
|
| 95 |
retry_btn="🔄 重新生成",
|
| 96 |
+
undo_btn="↩️ 撤回",
|
| 97 |
+
clear_btn="🗑️ 清空",
|
| 98 |
)
|
| 99 |
|
| 100 |
if __name__ == "__main__":
|
| 101 |
+
# 关键修复:Hugging Face 必须监听 0.0.0.0 且端口为 7860
|
| 102 |
+
demo.launch(server_name="0.0.0.0", server_port=7860)
|