import os import sys from retriever_builder import process_pdfs_to_chunks, save_embeddings from conversation_manager import PlannerAgent # 部署时解压pdf文件 sys.path.append(os.path.dirname(os.path.abspath(__file__))) os.system(f"tar -xzvf /home/user/app/pdf_files.tar.gz") # 初始化向量数据库 pdf_paths = "./pdf_files" all_chunks = process_pdfs_to_chunks(pdf_paths) vectordb = save_embeddings(all_chunks, persist_directory='./data_base/vector_db/chroma', overwrite=True) # 是否需要复写(是否有新增) retriever = vectordb.as_retriever(search_kwargs={"k": 4}) # 用于缓存 agent 实例(支持多轮) agent = PlannerAgent(retriever=retriever) def classify_job_type(job_name: str) -> str: job_name = job_name.lower() job_name = job_name.split('(')[0] # 新业态关键词 new_economy_jobs = ["外卖", "快递", "网约车", "主播", "骑手", "平台", "直播", "自媒体"] # 灵活就业关键词 flexible_jobs = ["自由", "个体户", "兼职", "临时工", "接单", "顾问", "自由职业者"] # 城镇职工关键词(白领/技术类等) urban_jobs = ["公司", "企业", "工程师", "职员", "护士", "程序员"] # 城乡居民关键词 rural_jobs = ["农民", "养殖户", "渔民", "果农", "农业工人", "林业工人", "乡村医生"] for kw in new_economy_jobs: if kw in job_name: return "新业态就业" for kw in flexible_jobs: if kw in job_name: return "灵活就业" for kw in urban_jobs: if kw in job_name: return "城镇职工" for kw in rural_jobs: if kw in job_name: return "城乡居民" if job_name not in new_economy_jobs and job_name not in flexible_jobs and job_name not in urban_jobs: return "其他" # 默认值 return "其他" def user_asks(message, history, *args): user_goal = args[0] if len(args) > 0 else "未知" job_input = args[1] if len(args) > 1 else "未知" situation = args[2] if len(args) > 2 else "未知" city = args[3] if len(args) > 3 else "未知" age = args[4] if len(args) > 4 else "未知" other_info = args[5] if len(args) > 5 else "未知" # 自动分类 job_type = classify_job_type(job_input) user_info = { "goal": user_goal, "job_info": job_input, "job_type": job_type, "city": city, "age": age, "situation": situation, "other_info": other_info, } current_history = history + [{"role": "user", "content": message}, {"role": "assistant", "content": ""}] # 显示用户消息,同时清空输入框 yield current_history, "" full_response = "" try: for chunk in agent.stream_reply(message, user_info): print(f"--- Received chunk from agent: '{chunk}' ---") full_response += chunk current_history[-1]["content"] = full_response yield current_history, "" print("--- Agent streaming loop finished ---") except Exception as e: print(f"!!! ERROR: An exception occurred during agent.stream_reply or its iteration: {e}") import traceback traceback.print_exc() current_history[-1]["content"] = f"抱歉,系统内部发生错误,无法生成回复。错误详情:{e}" yield current_history, "" print("--- Exiting user_asks ---")