ChienChung commited on
Commit
e26601a
·
verified ·
1 Parent(s): 6296028

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -148
app.py CHANGED
@@ -4,7 +4,6 @@ import shutil
4
  import json
5
  import torch
6
  import transformers
7
- import chardet
8
  from transformers import AutoModelForCausalLM, AutoTokenizer
9
  from transformers.models.llama.configuration_llama import LlamaConfig
10
  from huggingface_hub import hf_hub_download
@@ -45,11 +44,7 @@ from serpapi import GoogleSearch
45
  # CrewAI 部分:完全使用 CrewAI 的 Agent、Task、Crew 與 @tool 裝飾器
46
  from crewai import Crew, Agent, Task, Process
47
  from crewai.tools import tool
48
- from langchain_experimental.agents import create_pandas_dataframe_agent
49
 
50
- session_retriever = None
51
- session_qa_chain = None
52
- csv_dataframe = None # CSV tool will use this
53
  # === Model and Device Setup ===
54
  if torch.backends.mps.is_available():
55
  device = "mps"
@@ -138,12 +133,7 @@ Answer:
138
  )
139
 
140
  llm_local = HuggingFacePipeline(pipeline=query_pipeline)
141
- llm_gpt4 = ChatOpenAI(model_name="gpt-4o-mini", temperature=0.2, openai_api_key=openai_api_key)
142
- crew_llm = ChatOpenAI(
143
- model_name="gpt-4o-mini",
144
- temperature=0.2,
145
- openai_api_key=openai_api_key
146
- )
147
 
148
  memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
149
  qa_gpt = ConversationalRetrievalChain.from_llm(
@@ -244,16 +234,12 @@ def document_summarize(file):
244
  summary = summarize_chain.invoke(docs)
245
  return summary['output_text']
246
 
247
-
248
  def csv_agent(file, query):
249
  file_path = get_file_path(file)
250
  if file_path is None:
251
  return "Unable to obtain the uploaded CSV file."
252
  try:
253
- with open(file_path, 'rb') as f:
254
- result = chardet.detect(f.read())
255
- encoding = result['encoding']
256
- df = pd.read_csv(file_path, encoding=encoding)
257
  except Exception as e:
258
  return f"Error reading CSV: {e}"
259
  safe_dict = {"df": df, "pd": pd}
@@ -312,7 +298,7 @@ class SimpleQuery(BaseModel):
312
 
313
  @tool("summarise")
314
  def summarise_tool(query: str) -> str:
315
- """Summarise: Use document summarisation functionality."""
316
  global session_retriever, session_qa_chain
317
  if session_retriever is None:
318
  return "尚未上傳文件。"
@@ -328,7 +314,7 @@ def summarise_tool(query: str) -> str:
328
 
329
  @tool("python_calc")
330
  def python_calc_tool(query: str) -> str:
331
- """Python Calculation: Perform basic arithmetic or logical operations."""
332
  try:
333
  return str(eval(query))
334
  except Exception as e:
@@ -336,12 +322,12 @@ def python_calc_tool(query: str) -> str:
336
 
337
  @tool("search_agent")
338
  def search_tool_func(query: str) -> str:
339
- """Search: Perform web searches using external search engines."""
340
  return search_agent(query)
341
 
342
  @tool("uploaded_qa")
343
  def uploaded_qa_tool_func(query: str) -> str:
344
- """Document QA: Answer questions based on the uploaded document content."""
345
  global session_qa_chain
346
  if session_qa_chain is not None:
347
  try:
@@ -350,105 +336,52 @@ def uploaded_qa_tool_func(query: str) -> str:
350
  return f"文檔問答錯誤: {e}"
351
  else:
352
  return "尚未上傳文件。"
353
-
354
- @tool("csv_agent")
355
- def csv_tool_func(query: str) -> str:
356
- """CSV Agent: Use natural language to analyse uploaded CSV files."""
357
- global csv_dataframe
358
- if csv_dataframe is None:
359
- return "No CSV file uploaded."
360
- try:
361
- agent = create_pandas_dataframe_agent(llm=llm_gpt4, df=csv_dataframe, verbose=True)
362
- return agent.run(f"Here is the table:\n{csv_dataframe.head().to_string(index=False)}\n\n{query}")
363
- except Exception as e:
364
- return f"CSV Agent error: {e}"
365
 
366
  # 建立 CrewAI 代理(僅針對 Tab 5)
367
  summarizer_agent = Agent(
368
- role="Document Summarizer",
369
- goal="Summarise the content of the uploaded document.",
370
- backstory="You are a professional summarisation expert who can identify key points in long documents.",
371
  tools=[summarise_tool],
372
  verbose=True
373
  )
374
  document_qa_agent = Agent(
375
- role="Document QA Specialist",
376
- goal="Answer questions based on the uploaded document.",
377
- backstory="You are an expert in document understanding and can accurately extract answers.",
378
  tools=[uploaded_qa_tool_func],
379
  verbose=True
380
  )
381
-
382
- search_agent = Agent(
383
- role="Search Expert",
384
- goal="Search the web and provide relevant information.",
385
- backstory="You are an expert at finding relevant information from the internet.",
386
- tools=[search_tool_func],
387
  verbose=True
388
  )
389
 
390
- math_agent = Agent(
391
- role="Math Assistant",
392
- goal="Perform accurate arithmetic or logical calculations.",
393
- backstory="You are a calculator expert skilled at quick computations.",
394
- tools=[python_calc_tool],
395
- verbose=True
396
- )
397
- csv_agent = Agent(
398
- role="CSV Analyst",
399
- goal="Analyse tabular data and answer questions about the uploaded CSV file.",
400
- backstory="You are skilled in interpreting tabular datasets and can extract numerical or logical insights.",
401
- tools=[csv_tool_func],
402
- verbose=True
403
- )
404
- router_agent = Agent(
405
- role="Query Router",
406
- goal="Determine the most suitable agent or tool to handle the user query.",
407
- backstory="You are an intelligent query dispatcher that analyses the user's intent and chooses the best AI agent to answer.",
408
- tools=[python_calc_tool, search_tool_func, csv_tool_func, uploaded_qa_tool_func, summarise_tool],
409
- verbose=True
410
- )
411
  router_task = Task(
412
- description=(
413
- "Based on the user's query, decide which agent or tool is best suited to handle it:\n"
414
- "- If the query is related to the content of an uploaded file (e.g., 'what is this document about?'), send it to the **Document QA Agent**.\n"
415
- "- If the query contains words like 'summarize', 'summary', or 'main points', use the **Summarizer Agent**.\n"
416
- "- If the query involves numbers, calculations, or logic (e.g., '50 * 23 - 5', 'what is 10% of 800'), send it to the **Math Agent**.\n"
417
- "- If the user uploaded a CSV file and asks about table content, data trends, or uses words like 'data', 'table', 'csv', 'column', or 'row', send it to the **CSV Agent**.\n"
418
- "- If the user asks about current events, trending topics, or online information (e.g., 'What is LangChain?', 'latest news'), send it to the **Search Agent**.\n"
419
- "- If none of these apply, use your best judgment to choose the most relevant agent."
420
- ),
421
- expected_output="The final answer from the selected agent or tool.",
422
- agent=router_agent,
423
- input_variables=["query"]
424
  )
425
 
426
  crew = Crew(
427
- agents=[summarizer_agent, document_qa_agent, search_agent, math_agent, csv_agent],
428
  tasks=[router_task],
429
  process=Process.sequential,
430
- verbose=True,
431
- llm=crew_llm
432
  )
433
 
434
  def multi_agent_chat(query: str) -> str:
435
- print(f"Routing query: {query}")
436
  try:
437
- result = crew.kickoff(inputs={"query": query})
438
- result_str = str(result)
439
- if "I don't know." in result_str or result_str.strip() == "":
440
- return search_agent(query) # fallback 給搜尋
441
- step = result.steps[-1] if result and hasattr(result, "steps") else None
442
- agent_name = step.agent.name if step else "Unknown"
443
- output = step.output if step else str(result)
444
- return f"[Agent: {agent_name}]\n{output}"
445
  except Exception as e:
446
  return f"Error: {e}"
447
 
448
  def multi_agent_chat_advanced(query: str, file=None) -> str:
449
  global session_retriever, session_qa_chain
450
-
451
- # 判斷是否為與文件無關的查詢
452
  non_doc_keywords = ["calculate", "sum", "date", "time", "how many", "how much", "weather", "temperature"]
453
  use_file_chain = True
454
  for kw in non_doc_keywords:
@@ -460,31 +393,12 @@ def multi_agent_chat_advanced(query: str, file=None) -> str:
460
  file_path = get_file_path(file)
461
  if file_path is None:
462
  return "Unable to process the file format."
463
-
464
- # === CSV 處理 ===
465
  if file_path.lower().endswith(".csv"):
466
- global csv_dataframe
467
- try:
468
- with open(file_path, 'rb') as f:
469
- result = chardet.detect(f.read())
470
- encoding = result['encoding']
471
- df = pd.read_csv(file_path, encoding=encoding)
472
- csv_dataframe = df
473
- result = crew.kickoff(inputs={"query": query})
474
- step = result.steps[-1] if result and hasattr(result, "steps") else None
475
- agent_name = step.agent.name if step else "Unknown"
476
- output = step.output if step else str(result)
477
- return f"[Agent: {agent_name}]\n{output}"
478
- except Exception as e:
479
- return f"Error reading CSV: {e}"
480
-
481
- # === 文本類型文件(PDF / DOCX / TXT) ===
482
  elif file_path.lower().endswith((".pdf", ".txt", ".docx")):
483
- loader = (
484
- PyPDFLoader(file_path) if file_path.lower().endswith(".pdf")
485
- else UnstructuredWordDocumentLoader(file_path) if file_path.lower().endswith(".docx")
486
- else TextLoader(file_path)
487
- )
488
  docs = loader.load()
489
  chunks = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50).split_documents(docs)
490
  db = FAISS.from_documents(chunks, embeddings)
@@ -493,55 +407,28 @@ def multi_agent_chat_advanced(query: str, file=None) -> str:
493
  llm=llm_gpt4,
494
  retriever=session_retriever,
495
  memory=ConversationBufferMemory(memory_key="chat_history", return_messages=True),
496
- #combine_docs_chain_kwargs={"prompt": custom_prompt}
497
  )
498
-
499
- # 決定使用摘要還是 QA
500
- if any(kw in query.lower() for kw in ["summarize", "summary", "摘要", "總結"]):
501
- return document_summarize(file_path)
502
- elif use_file_chain:
503
- try:
504
- return session_qa_chain.run(query)
505
- except Exception as e:
506
- return f"Error: {e}"
507
  else:
508
  try:
509
- result = crew.kickoff(inputs={"query": query})
510
- step = result.steps[-1] if result and hasattr(result, "steps") else None
511
- agent_name = step.agent.name if step else "Unknown"
512
- output = step.output if step else str(result)
513
- return f"[Agent: {agent_name}]\n{output}"
514
  except Exception as e:
515
  return f"Error: {e}"
516
-
517
  else:
518
  return "Unsupported file format."
519
-
520
- # 沒有上傳新檔案
521
  elif session_qa_chain is not None:
522
  if use_file_chain:
523
- try:
524
- return session_qa_chain.run(query)
525
- except Exception as e:
526
- return f"Error: {e}"
527
  else:
528
  try:
529
- result = crew.kickoff(inputs={"query": query})
530
- step = result.steps[-1] if result and hasattr(result, "steps") else None
531
- agent_name = step.agent.name if step else "Unknown"
532
- output = step.output if step else str(result)
533
- return f"[Agent: {agent_name}]\n{output}"
534
  except Exception as e:
535
  return f"Error: {e}"
536
-
537
- # 沒有 session,直接丟給 CrewAI
538
  else:
539
  try:
540
- result = crew.kickoff(inputs={"query": query})
541
- step = result.steps[-1] if result and hasattr(result, "steps") else None
542
- agent_name = step.agent.name if step else "Unknown"
543
- output = step.output if step else str(result)
544
- return f"[Agent: {agent_name}]\n{output}"
545
  except Exception as e:
546
  return f"Error: {e}"
547
 
 
4
  import json
5
  import torch
6
  import transformers
 
7
  from transformers import AutoModelForCausalLM, AutoTokenizer
8
  from transformers.models.llama.configuration_llama import LlamaConfig
9
  from huggingface_hub import hf_hub_download
 
44
  # CrewAI 部分:完全使用 CrewAI 的 Agent、Task、Crew 與 @tool 裝飾器
45
  from crewai import Crew, Agent, Task, Process
46
  from crewai.tools import tool
 
47
 
 
 
 
48
  # === Model and Device Setup ===
49
  if torch.backends.mps.is_available():
50
  device = "mps"
 
133
  )
134
 
135
  llm_local = HuggingFacePipeline(pipeline=query_pipeline)
136
+ llm_gpt4 = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0.2, openai_api_key=openai_api_key)
 
 
 
 
 
137
 
138
  memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
139
  qa_gpt = ConversationalRetrievalChain.from_llm(
 
234
  summary = summarize_chain.invoke(docs)
235
  return summary['output_text']
236
 
 
237
  def csv_agent(file, query):
238
  file_path = get_file_path(file)
239
  if file_path is None:
240
  return "Unable to obtain the uploaded CSV file."
241
  try:
242
+ df = pd.read_csv(file_path)
 
 
 
243
  except Exception as e:
244
  return f"Error reading CSV: {e}"
245
  safe_dict = {"df": df, "pd": pd}
 
298
 
299
  @tool("summarise")
300
  def summarise_tool(query: str) -> str:
301
+ """Summarise: 使用文件摘要功能。"""
302
  global session_retriever, session_qa_chain
303
  if session_retriever is None:
304
  return "尚未上傳文件。"
 
314
 
315
  @tool("python_calc")
316
  def python_calc_tool(query: str) -> str:
317
+ """Python Calculation: 執行簡單計算。"""
318
  try:
319
  return str(eval(query))
320
  except Exception as e:
 
322
 
323
  @tool("search_agent")
324
  def search_tool_func(query: str) -> str:
325
+ """Search: 執行網路搜尋。"""
326
  return search_agent(query)
327
 
328
  @tool("uploaded_qa")
329
  def uploaded_qa_tool_func(query: str) -> str:
330
+ """Document QA: 根據上傳文件回答問題。"""
331
  global session_qa_chain
332
  if session_qa_chain is not None:
333
  try:
 
336
  return f"文檔問答錯誤: {e}"
337
  else:
338
  return "尚未上傳文件。"
 
 
 
 
 
 
 
 
 
 
 
 
339
 
340
  # 建立 CrewAI 代理(僅針對 Tab 5)
341
  summarizer_agent = Agent(
342
+ role="文件摘要助手",
343
+ goal="對上傳文件內容進行摘要",
344
+ backstory="你是一位專業的摘要專家,能抓住長文的重點。",
345
  tools=[summarise_tool],
346
  verbose=True
347
  )
348
  document_qa_agent = Agent(
349
+ role="文件問答專家",
350
+ goal="根據上傳文件回答問題",
351
+ backstory="你精通文檔內容,能從中找出問題答案。",
352
  tools=[uploaded_qa_tool_func],
353
  verbose=True
354
  )
355
+ general_agent = Agent(
356
+ role="綜合助手",
357
+ goal="回答一般問題,執行計算與網路搜尋",
358
+ backstory="你是一位多才多藝的AI助理,能根據需要使用工具。",
359
+ tools=[python_calc_tool, search_tool_func],
 
360
  verbose=True
361
  )
362
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
363
  router_task = Task(
364
+ description="根據使用者查詢自動決定使用哪個工具進行回答。",
365
+ expected_output="最終回答",
366
+ agent=general_agent
 
 
 
 
 
 
 
 
 
367
  )
368
 
369
  crew = Crew(
370
+ agents=[summarizer_agent, document_qa_agent, general_agent],
371
  tasks=[router_task],
372
  process=Process.sequential,
373
+ verbose=True
 
374
  )
375
 
376
  def multi_agent_chat(query: str) -> str:
 
377
  try:
378
+ return crew.run(query)
 
 
 
 
 
 
 
379
  except Exception as e:
380
  return f"Error: {e}"
381
 
382
  def multi_agent_chat_advanced(query: str, file=None) -> str:
383
  global session_retriever, session_qa_chain
384
+ # 定義一些明顯與文件無關的關鍵字
 
385
  non_doc_keywords = ["calculate", "sum", "date", "time", "how many", "how much", "weather", "temperature"]
386
  use_file_chain = True
387
  for kw in non_doc_keywords:
 
393
  file_path = get_file_path(file)
394
  if file_path is None:
395
  return "Unable to process the file format."
 
 
396
  if file_path.lower().endswith(".csv"):
397
+ return csv_agent(file, query)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
398
  elif file_path.lower().endswith((".pdf", ".txt", ".docx")):
399
+ loader = (PyPDFLoader(file_path) if file_path.lower().endswith(".pdf")
400
+ else UnstructuredWordDocumentLoader(file_path) if file_path.lower().endswith(".docx")
401
+ else TextLoader(file_path))
 
 
402
  docs = loader.load()
403
  chunks = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50).split_documents(docs)
404
  db = FAISS.from_documents(chunks, embeddings)
 
407
  llm=llm_gpt4,
408
  retriever=session_retriever,
409
  memory=ConversationBufferMemory(memory_key="chat_history", return_messages=True),
410
+ combine_docs_chain_kwargs={"prompt": custom_prompt}
411
  )
412
+ if use_file_chain:
413
+ return session_qa_chain.run(query)
 
 
 
 
 
 
 
414
  else:
415
  try:
416
+ return crew.run(query)
 
 
 
 
417
  except Exception as e:
418
  return f"Error: {e}"
 
419
  else:
420
  return "Unsupported file format."
 
 
421
  elif session_qa_chain is not None:
422
  if use_file_chain:
423
+ return session_qa_chain.run(query)
 
 
 
424
  else:
425
  try:
426
+ return crew.run(query)
 
 
 
 
427
  except Exception as e:
428
  return f"Error: {e}"
 
 
429
  else:
430
  try:
431
+ return crew.run(query)
 
 
 
 
432
  except Exception as e:
433
  return f"Error: {e}"
434