alen commited on
Commit
4820f61
·
verified ·
1 Parent(s): 68a4177

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +103 -139
app.py CHANGED
@@ -1,89 +1,93 @@
1
  import gradio as gr
2
- from langchain_community.document_loaders import PyPDFLoader
3
- from langchain.text_splitter import RecursiveCharacterTextSplitter
4
- from langchain_community.document_loaders import PyPDFLoader
5
- from langchain_community.embeddings import GPT4AllEmbeddings
6
- from langchain_community.vectorstores import FAISS
7
- from langchain.schema.runnable import RunnablePassthrough
8
- # from langchain.prompts import ChatPromptTemplate
9
- # from langchain_community.chat_models import ChatOllama
10
- from prompt_template import *
11
  from langgraph.graph import END, StateGraph
12
- from langchain_community.llms import LlamaCpp
13
-
14
- # local_llm = 'aleni_ox'
15
 
16
- # llm = ChatOllama(model=local_llm,
17
- # keep_alive="3h",
18
- # max_tokens=512,
19
- # temperature=0,
20
- # # callbacks=[StreamingStdOutCallbackHandler()]
21
- # )
22
 
23
  llm = LlamaCpp(
24
  model_path="Llama-3.1-8B-Instruct.Q5_K_M.gguf",
25
  temperature=0,
26
  max_tokens=512,
 
27
  top_p=1,
28
  # callback_manager=callback_manager,
29
  verbose=True, # Verbose is required to pass to the callback manager
30
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
  question_router = router_prompt | llm | JsonOutputParser()
33
  generate_chain = generate_prompt | llm | StrOutputParser()
34
  query_chain = query_prompt | llm | JsonOutputParser()
35
- llm_chain = nomalqa_prompt | llm | StrOutputParser()
36
 
37
- def generate(state):
38
- """
39
- Generate answer
40
 
41
- Args:
42
- state (dict): The current graph state
43
 
44
- Returns:
45
- state (dict): New key added to state, generation, that contains LLM generation
46
- """
 
47
 
48
- print("Step: Đang tạo câu trả lời từ những gì tìm được")
 
 
49
  question = state["question"]
50
  context = state["context"]
51
  # return question, context
52
  return {'question': question, 'context': context}
53
 
54
- # respon=''
55
-
56
- # for chunk in generate_chain.stream({"context": context, "question": question}):
57
- # respon += chunk
58
- # print(chunk, end="", flush=True)
59
 
60
  def transform_query(state):
61
- """
62
- Transform user question to web search
63
-
64
- Args:
65
- state (dict): The current graph state
66
 
67
- Returns:
68
- state (dict): Appended search query
69
- """
70
-
71
  print("Step: Tối ưu câu hỏi của người dùng")
72
  question = state['question']
73
  gen_query = query_chain.invoke({"question": question})
 
74
  search_query = gen_query["query"]
75
  return {"search_query": search_query}
76
 
77
  def web_search(state):
78
- """
79
- Web search based on the question
80
-
81
- Args:
82
- state (dict): The current graph state
83
-
84
- Returns:
85
- state (dict): Appended web results to context
86
- """
87
 
88
  search_query = state['search_query']
89
  print(f'Step: Đang tìm kiếm web cho: "{search_query}"')
@@ -94,112 +98,72 @@ def web_search(state):
94
  return {"context": search_result}
95
 
96
  def route_question(state):
97
- """
98
- route question to web search or generation.
99
-
100
- Args:
101
- state (dict): The current graph state
102
-
103
- Returns:
104
- str: Next node to call
105
- """
106
 
107
  print("Step: Routing Query")
108
  question = state['question']
109
  output = question_router.invoke({"question": question})
110
  print('Lựa ch���n của AI là: ', output)
111
- if output == "web_search":
112
  # print("Step: Routing Query to Web Search")
113
  return "websearch"
114
- elif output == 'generate':
115
  # print("Step: Routing Query to Generation")
116
  return "generate"
117
 
118
- workflow = StateGraph(State)
119
- workflow.add_node("websearch", web_search)
120
- workflow.add_node("transform_query", transform_query)
121
- workflow.add_node("generate", generate)
122
-
123
- # Build the edges
124
- workflow.set_conditional_entry_point(
125
- route_question,
126
- {
127
- "websearch": "transform_query",
128
- "generate": "generate",
129
- },
130
- )
131
- workflow.add_edge("transform_query", "websearch")
132
- workflow.add_edge("websearch", "generate")
133
- workflow.add_edge("generate", END)
134
 
135
- # Compile the workflow
136
- local_agent = workflow.compile()
 
 
 
 
 
 
 
 
 
137
 
138
- def run_agent(query):
139
- local_agent.invoke({"question": query})
140
- print("=======")
141
 
142
- def QA(question: str, history: list, type: str):
143
- if 'Agent' in type:
 
 
144
  gr.Info("Đang tạo câu trả lời!")
145
- respon = ''
146
- # print(question)
147
- output = local_agent.invoke({"question": question})
148
- # print(output)
149
  context = output['context']
150
  questions = output['question']
151
- for chunk in generate_chain.stream({"context": context, "question": questions}):
152
- respon += chunk
153
- print(chunk, end="", flush=True)
154
- yield respon
155
- else:
156
- gr.Info("Đang tạo câu trả lời!")
157
- print(question, history)
158
- respon = ''
159
- for chunk in llm_chain.stream(question):
160
- respon += chunk
161
  print(chunk, end="", flush=True)
162
- yield respon
163
-
164
- def create_db(doc: str) -> str:
165
- loader = PyPDFLoader(doc)
166
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=40)
167
-
168
- chunked_documents = loader.load_and_split(text_splitter)
169
- embedding_model = GPT4AllEmbeddings(model_name="all-MiniLM-L6-v2.gguf2.f16.gguf", gpt4all_kwargs={'allow_download': 'True'})
170
- db = FAISS.from_documents(chunked_documents, embedding_model)
171
- gr.Info("Đã tải lên dữ liệu từ PDF!")
172
-
173
- retriever = db.as_retriever(
174
- search_type="similarity",
175
- search_kwargs= {"k": 3}
176
- )
177
- llm_chain = (
178
- {
179
- "context": retriever,
180
- "question": RunnablePassthrough()}
181
- | nomaldoc_prompt
182
- | llm
183
- )
184
-
185
 
186
- with gr.Blocks(fill_height=True) as demo:
187
- with gr.Row(equal_height=True):
 
 
 
188
 
189
- with gr.Column(scale=1):
190
-
191
- democ2 = gr.Interface(
192
- create_db,
193
- [gr.File(file_count='single')],
194
- None,
195
- )
196
- with gr.Column(scale=2):
197
- democ1 = gr.ChatInterface(
198
- QA,
199
- additional_inputs=[gr.Dropdown(["None", "Agent", "Doc"], label="Type", info="Chọn một kiểu chat!"),]
200
-
201
-
202
- )
203
 
204
  if __name__ == "__main__":
205
- demo.launch()
 
1
  import gradio as gr
2
+ from langchain_community.utilities import DuckDuckGoSearchAPIWrapper
3
+ from langchain_community.tools import DuckDuckGoSearchRun
4
+ from langchain.prompts import PromptTemplate, MessagesPlaceholder
5
+ from langchain_core.output_parsers import JsonOutputParser, StrOutputParser
6
+ from typing_extensions import TypedDict
7
+ from langchain_core.prompts import ChatPromptTemplate
8
+ import pickle
9
+ from langchain_core.messages import HumanMessage, AIMessage
 
10
  from langgraph.graph import END, StateGraph
11
+ from huggingface_hub import hf_hub_download
 
 
12
 
13
+ from langchain_community.llms import LlamaCpp
14
+ wrapper = DuckDuckGoSearchAPIWrapper(max_results=5)
15
+ web_search_tool = DuckDuckGoSearchRun(api_wrapper=wrapper)
 
 
 
16
 
17
  llm = LlamaCpp(
18
  model_path="Llama-3.1-8B-Instruct.Q5_K_M.gguf",
19
  temperature=0,
20
  max_tokens=512,
21
+ n_ctx = 8000,
22
  top_p=1,
23
  # callback_manager=callback_manager,
24
  verbose=True, # Verbose is required to pass to the callback manager
25
  )
26
+ chat_history = list()
27
+ try:
28
+ with open("template.pkl", 'rb') as file:
29
+ template_abox = pickle.load(file)
30
+ except:
31
+ hf_hub_download(repo_id="linl03/dataAboxChat",local_dir="./", filename="template.pkl", repo_type="dataset")
32
+ with open("./template.pkl", 'rb') as file:
33
+ template_abox = pickle.load(file)
34
+
35
+ router_prompt = PromptTemplate(
36
+ template=template_abox["router_template"],
37
+ input_variables=["question"],
38
+ )
39
+ generate_prompt = ChatPromptTemplate.from_messages(
40
+ [
41
+ (
42
+ "system",
43
+ template_abox["system_prompt"],
44
+ ),
45
+ MessagesPlaceholder(variable_name="chat_history"),
46
+ ("human", "{question}"),
47
+ ]
48
+ )
49
+ query_prompt = PromptTemplate(
50
+ template=template_abox["query_template"],
51
+ input_variables=["question"],
52
+ )
53
+
54
+ remind_prompt = PromptTemplate(
55
+ template=template_abox["schedule_template"],
56
+ input_variables=["time"],
57
+ )
58
 
59
  question_router = router_prompt | llm | JsonOutputParser()
60
  generate_chain = generate_prompt | llm | StrOutputParser()
61
  query_chain = query_prompt | llm | JsonOutputParser()
62
+ # llm_chain = nomalqa_prompt | llm | StrOutputParser()
63
 
 
 
 
64
 
65
+ class State(TypedDict):
 
66
 
67
+ question : str
68
+ generation : str
69
+ search_query : str
70
+ context : str
71
 
72
+ def generate(state):
73
+
74
+ print("Step: Đang tạo câu trả lời")
75
  question = state["question"]
76
  context = state["context"]
77
  # return question, context
78
  return {'question': question, 'context': context}
79
 
 
 
 
 
 
80
 
81
  def transform_query(state):
 
 
 
 
 
82
 
 
 
 
 
83
  print("Step: Tối ưu câu hỏi của người dùng")
84
  question = state['question']
85
  gen_query = query_chain.invoke({"question": question})
86
+ print(gen_query)
87
  search_query = gen_query["query"]
88
  return {"search_query": search_query}
89
 
90
  def web_search(state):
 
 
 
 
 
 
 
 
 
91
 
92
  search_query = state['search_query']
93
  print(f'Step: Đang tìm kiếm web cho: "{search_query}"')
 
98
  return {"context": search_result}
99
 
100
  def route_question(state):
 
 
 
 
 
 
 
 
 
101
 
102
  print("Step: Routing Query")
103
  question = state['question']
104
  output = question_router.invoke({"question": question})
105
  print('Lựa ch���n của AI là: ', output)
106
+ if output['choice'] == "web_search":
107
  # print("Step: Routing Query to Web Search")
108
  return "websearch"
109
+ elif output['choice'] == 'generate':
110
  # print("Step: Routing Query to Generation")
111
  return "generate"
112
 
113
+ def Agents():
114
+ workflow = StateGraph(State)
115
+ workflow.add_node("websearch", web_search)
116
+ workflow.add_node("transform_query", transform_query)
117
+ workflow.add_node("generate", generate)
 
 
 
 
 
 
 
 
 
 
 
118
 
119
+ # Build the edges
120
+ workflow.set_conditional_entry_point(
121
+ route_question,
122
+ {
123
+ "websearch": "transform_query",
124
+ "generate": "generate",
125
+ },
126
+ )
127
+ workflow.add_edge("transform_query", "websearch")
128
+ workflow.add_edge("websearch", "generate")
129
+ workflow.add_edge("generate", END)
130
 
131
+ # Compile the workflow
132
+ return workflow.compile()
 
133
 
134
+ def QA(question: str, history: list):
135
+ # print(question.text, question.files, history, type)
136
+ if not question.files:
137
+ local_agent = Agents()
138
  gr.Info("Đang tạo câu trả lời!")
139
+ response = ''
140
+ output = local_agent.invoke({"question": question.text})
 
 
141
  context = output['context']
142
  questions = output['question']
143
+ for chunk in generate_chain.stream({"context": context, "question": questions, "chat_history": chat_history}):
144
+ response += chunk
 
 
 
 
 
 
 
 
145
  print(chunk, end="", flush=True)
146
+ yield response
147
+
148
+ chat_history.append(HumanMessage(content=question.text))
149
+ chat_history.append(AIMessage(content=response))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
150
 
151
+ else:
152
+ print(question.files)
153
+ # all_loaders = []
154
+ # for pdf in question.files:
155
+ # pdf_loader = PyPDFLoader("data/Đắc Nhân Tâm.pdf")
156
 
157
+
158
+ democ1 = gr.ChatInterface(
159
+ QA,
160
+ additional_inputs=[
161
+ ],
162
+ fill_height=True,
163
+ multimodal=True,
164
+ title="Box Chat(Agent)",
165
+ type="tuples"
166
+ )
 
 
 
 
167
 
168
  if __name__ == "__main__":
169
+ democ1.launch()