jmlon commited on
Commit
de409d0
·
1 Parent(s): 15d6538

Fixing problems with Runnables

Browse files
__pycache__/app02-chatRagLcel.cpython-310.pyc CHANGED
Binary files a/__pycache__/app02-chatRagLcel.cpython-310.pyc and b/__pycache__/app02-chatRagLcel.cpython-310.pyc differ
 
app02-chatRagLcel.py CHANGED
@@ -7,10 +7,10 @@ import gradio as gr
7
  from operator import itemgetter
8
 
9
  # Langchain
10
- from langchain.chains import RetrievalQA
11
- from langchain.prompts import ChatPromptTemplate
12
- from langchain_core.runnables import RunnableParallel,RunnablePassthrough
13
  from langchain_core.output_parsers import StrOutputParser
 
14
 
15
  # HuggingFace
16
  from langchain_community.embeddings import HuggingFaceEmbeddings
@@ -111,13 +111,85 @@ def rag_query(question: str, history: list[list[str]]):
111
  response = chain.invoke({ "chat_history": chat_history, "question": question })
112
  return response.content
113
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
 
115
 
116
 
117
 
118
 
119
  gr.ChatInterface(
120
- rag_query,
121
  title="RAG Chatbot demo",
122
  description="A chatbot doing Retrieval Augmented Generation, backed by a Pinecone vector database"
123
  ).launch()
 
7
  from operator import itemgetter
8
 
9
  # Langchain
10
+ from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
11
+ from langchain_core.runnables import RunnableParallel,RunnablePassthrough,RunnableLambda
 
12
  from langchain_core.output_parsers import StrOutputParser
13
+ from langchain_core.messages import AIMessage, HumanMessage
14
 
15
  # HuggingFace
16
  from langchain_community.embeddings import HuggingFaceEmbeddings
 
111
  response = chain.invoke({ "chat_history": chat_history, "question": question })
112
  return response.content
113
 
114
+ # ----------------------------------------
115
+
116
+
117
+ def pipeLog(s:str, x):
118
+ print(s, x)
119
+ return x
120
+ pipe_a = RunnableLambda(lambda x: pipeLog("a:",x))
121
+ pipe_b = RunnableLambda(lambda x: pipeLog("b:",x))
122
+
123
+
124
+
125
+ contextualize_q_system_prompt = """Given a chat history and the latest user question \
126
+ which might reference context in the chat history, formulate a standalone question \
127
+ which can be understood without the chat history. Do NOT answer the question, \
128
+ just reformulate it if needed and otherwise return it as is."""
129
+
130
+ contextualize_q_prompt = ChatPromptTemplate.from_messages(
131
+ [
132
+ ("system", contextualize_q_system_prompt),
133
+ MessagesPlaceholder(variable_name="chat_history"),
134
+ ("human", "{question}"),
135
+ ]
136
+ )
137
+
138
+ contextualize_q_chain = contextualize_q_prompt | model | StrOutputParser()
139
+
140
+
141
+
142
+
143
+ qa_system_prompt = """You are an assistant for question-answering tasks.
144
+ Use the following pieces of retrieved context to answer the question.
145
+ If you don't know the answer, just say that you don't know.
146
+ Use three sentences maximum and keep the answer concise.
147
+
148
+ {context}"""
149
+ qa_prompt = ChatPromptTemplate.from_messages(
150
+ [
151
+ ("system", qa_system_prompt),
152
+ MessagesPlaceholder(variable_name="chat_history"),
153
+ ("human", "{question}"),
154
+ ]
155
+ )
156
+
157
+ def contextualized_question(input: dict):
158
+ if input.get("chat_history"):
159
+ return contextualize_q_chain
160
+ else:
161
+ return input["question"]
162
+
163
+
164
+ rag_chain = (
165
+ RunnablePassthrough.assign(
166
+ context=pipe_b | contextualized_question | retriever | format_docs
167
+ )
168
+ | qa_prompt
169
+ | model
170
+ )
171
+
172
+ rag_chain_with_source = RunnableParallel(
173
+ {"xx": pipe_a, "context": itemgetter('question')|retriever, "question": itemgetter('question'), "chat_history": itemgetter('chat_history') }
174
+ ).assign(answer=rag_chain)
175
+
176
+
177
+
178
+ def rag_query_2(question: str, history: list[list[str]]):
179
+ response = rag_chain_with_source.invoke({ 'question':question, 'chat_history':history })
180
+ print(response)
181
+ # sources = [ doc.metadata['source'] for doc in response['context'] ]
182
+ # print(response, '\n', sources)
183
+ return response['answer'].content
184
+
185
+
186
 
187
 
188
 
189
 
190
 
191
  gr.ChatInterface(
192
+ rag_query_2,
193
  title="RAG Chatbot demo",
194
  description="A chatbot doing Retrieval Augmented Generation, backed by a Pinecone vector database"
195
  ).launch()
test.ipynb CHANGED
@@ -1,8 +1,21 @@
1
  {
2
  "cells": [
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  {
4
  "cell_type": "code",
5
- "execution_count": 38,
6
  "metadata": {},
7
  "outputs": [],
8
  "source": [
@@ -14,7 +27,7 @@
14
  "# Langchain\n",
15
  "from langchain.chains import RetrievalQA\n",
16
  "from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder\n",
17
- "from langchain_core.runnables import RunnableParallel,RunnablePassthrough\n",
18
  "from langchain_core.output_parsers import StrOutputParser\n",
19
  "from langchain_core.messages import AIMessage, HumanMessage\n",
20
  "\n",
@@ -31,7 +44,7 @@
31
  },
32
  {
33
  "cell_type": "code",
34
- "execution_count": 10,
35
  "metadata": {},
36
  "outputs": [],
37
  "source": [
@@ -76,7 +89,7 @@
76
  },
77
  {
78
  "cell_type": "code",
79
- "execution_count": 6,
80
  "metadata": {},
81
  "outputs": [
82
  {
@@ -112,7 +125,7 @@
112
  },
113
  {
114
  "cell_type": "code",
115
- "execution_count": 7,
116
  "metadata": {},
117
  "outputs": [
118
  {
@@ -121,7 +134,7 @@
121
  "'Que es blockchain? : Blockchain es una cadena de bloques\\nPara que se usa : Para registrar transacciones\\n'"
122
  ]
123
  },
124
- "execution_count": 7,
125
  "metadata": {},
126
  "output_type": "execute_result"
127
  }
@@ -142,7 +155,7 @@
142
  },
143
  {
144
  "cell_type": "code",
145
- "execution_count": 8,
146
  "metadata": {},
147
  "outputs": [
148
  {
@@ -155,10 +168,10 @@
155
  {
156
  "data": {
157
  "text/plain": [
158
- "AIMessage(content='The provided chat history does not contain any information about consensus.')"
159
  ]
160
  },
161
- "execution_count": 8,
162
  "metadata": {},
163
  "output_type": "execute_result"
164
  }
@@ -183,7 +196,7 @@
183
  },
184
  {
185
  "cell_type": "code",
186
- "execution_count": 11,
187
  "metadata": {},
188
  "outputs": [],
189
  "source": [
@@ -204,7 +217,7 @@
204
  },
205
  {
206
  "cell_type": "code",
207
- "execution_count": 15,
208
  "metadata": {},
209
  "outputs": [
210
  {
@@ -215,10 +228,10 @@
215
  " Document(page_content='present in a peer-to-peer network, originally deployed for the bitcoin cryptocurrency. All\\nthe nodes present on the chain maintain a complete local copy of the blockchain. The\\nblockchain is an indigenous technology that has emerged for decentralized applications\\nas the outcome of complication, privacy, and security issues present in the applications\\nover half a century [3,4]. It is a peer-to-peer system that authorizes the users to maintain a\\nledger for various transactions that are reproduced, and remains identical in more than\\none location over multiple user servers [5].', metadata={'source': 'D:\\\\Downloads\\\\Datatrust-Info\\\\Text\\\\custody\\\\CustodyBlock-2021.txt'}),\n",
216
  " Document(page_content='2. Background\\nBlockchain technology has emerged as a disruptive innovation, providing a decentralized and transparent environment across various domains. Blockchain can be understood\\nas a distributed ledger technology that enables secure and immutable record-keeping of\\ndigital transactions. It comprises a chain of blocks, each containing a list of validated and\\ntime-stamped transactions. An interesting feature of blockchain is its decentralized nature,\\nwhere multiple participants, or nodes, maintain copies of the ledger. This distributed', metadata={'source': 'D:\\\\Downloads\\\\Datatrust-Info\\\\Text\\\\custody\\\\ExploringBC-2023.txt'})],\n",
217
  " 'question': 'What is a blockchain?',\n",
218
- " 'answer': 'A blockchain is a distributed ledger technology that enables secure and immutable record-keeping of digital transactions. It comprises a chain of blocks, each containing a list of validated and time-stamped transactions.'}"
219
  ]
220
  },
221
- "execution_count": 15,
222
  "metadata": {},
223
  "output_type": "execute_result"
224
  }
@@ -230,17 +243,17 @@
230
  },
231
  {
232
  "cell_type": "code",
233
- "execution_count": 19,
234
  "metadata": {},
235
  "outputs": [
236
  {
237
  "data": {
238
  "text/plain": [
239
  "('What is a blockchain?',\n",
240
- " 'A blockchain is a distributed ledger technology that enables secure and immutable record-keeping of digital transactions. It comprises a chain of blocks, each containing a list of validated and time-stamped transactions.')"
241
  ]
242
  },
243
- "execution_count": 19,
244
  "metadata": {},
245
  "output_type": "execute_result"
246
  }
@@ -251,7 +264,7 @@
251
  },
252
  {
253
  "cell_type": "code",
254
- "execution_count": 24,
255
  "metadata": {},
256
  "outputs": [
257
  {
@@ -272,7 +285,7 @@
272
  },
273
  {
274
  "cell_type": "code",
275
- "execution_count": 25,
276
  "metadata": {},
277
  "outputs": [
278
  {
@@ -284,7 +297,7 @@
284
  " 'D:\\\\Downloads\\\\Datatrust-Info\\\\Text\\\\custody\\\\ExploringBC-2023.txt']"
285
  ]
286
  },
287
- "execution_count": 25,
288
  "metadata": {},
289
  "output_type": "execute_result"
290
  }
@@ -303,7 +316,7 @@
303
  },
304
  {
305
  "cell_type": "code",
306
- "execution_count": 28,
307
  "metadata": {},
308
  "outputs": [],
309
  "source": [
@@ -318,7 +331,7 @@
318
  },
319
  {
320
  "cell_type": "code",
321
- "execution_count": 30,
322
  "metadata": {},
323
  "outputs": [],
324
  "source": [
@@ -335,7 +348,7 @@
335
  },
336
  {
337
  "cell_type": "code",
338
- "execution_count": 31,
339
  "metadata": {},
340
  "outputs": [
341
  {
@@ -344,7 +357,7 @@
344
  "'Data governance is the process of managing the availability, usability, integrity, and security of data in enterprise systems. It ensures that data is consistent, trustworthy, and not misused. Data governance is critical as organizations face new data privacy regulations and rely more on data analytics for decision-making.'"
345
  ]
346
  },
347
- "execution_count": 31,
348
  "metadata": {},
349
  "output_type": "execute_result"
350
  }
@@ -362,16 +375,16 @@
362
  },
363
  {
364
  "cell_type": "code",
365
- "execution_count": 40,
366
  "metadata": {},
367
  "outputs": [
368
  {
369
  "data": {
370
  "text/plain": [
371
- "'Large in the context of large language models (LLMs) typically refers to the size and capacity of the model\\'s parameters and training data. It can have several implications:\\n\\n**1. Model Complexity:**\\nLLMs with a large number of parameters (e.g., billions or trillions) can capture complex relationships and patterns in the data, enabling them to handle a wide range of language-related tasks.\\n\\n**2. Training Data Volume:**\\nLLMs are trained on massive datasets of text and code, which provide them with a comprehensive understanding of language and its usage. The size of the training data contributes to the model\\'s ability to generalize and perform well on various tasks.\\n\\n**3. Computational Resources:**\\nTraining and deploying large LLMs require significant computational resources, including powerful GPUs and specialized hardware. The size of the model influences the amount of memory, processing power, and training time required.\\n\\n**4. Task Performance:**\\nIn general, larger LLMs tend to perform better on language-related tasks such as text generation, translation, question answering, and dialogue systems. However, the optimal model size for a specific task may vary depending on the complexity and requirements of the task.\\n\\n**5. Generalization Ability:**\\nLarger LLMs have the potential to generalize better to unseen data and handle a wider range of language phenomena. They can learn from a more diverse set of examples and capture more subtle patterns in the language.\\n\\nIt\\'s important to note that \"large\" is a relative term, and the specific size of an LLM that is considered \"large\" can change over time as models continue to grow in size and capability.'"
372
  ]
373
  },
374
- "execution_count": 40,
375
  "metadata": {},
376
  "output_type": "execute_result"
377
  }
@@ -412,7 +425,7 @@
412
  },
413
  {
414
  "cell_type": "code",
415
- "execution_count": 41,
416
  "metadata": {},
417
  "outputs": [],
418
  "source": [
@@ -449,7 +462,7 @@
449
  },
450
  {
451
  "cell_type": "code",
452
- "execution_count": 44,
453
  "metadata": {},
454
  "outputs": [],
455
  "source": [
@@ -466,7 +479,7 @@
466
  },
467
  {
468
  "cell_type": "code",
469
- "execution_count": 45,
470
  "metadata": {},
471
  "outputs": [
472
  {
@@ -475,10 +488,10 @@
475
  "[HumanMessage(content='What is a Blockchain?'),\n",
476
  " AIMessage(content='A blockchain is a distributed ledger technology that enables secure and immutable record-keeping of digital transactions. \\nIt comprises a chain of blocks, each containing a list of validated and time-stamped transactions. \\nMultiple participants, or nodes, maintain copies of the ledger, making it decentralized.'),\n",
477
  " HumanMessage(content='What are its benefits?'),\n",
478
- " AIMessage(content='The benefits of blockchain technology include:\\n\\n* **Transparency:** Transactions are recorded on a public ledger, allowing for visibility and verification by all participants.\\n* **Immutability:** Once a transaction is recorded on the blockchain, it cannot be altered or deleted, ensuring the integrity of the data.\\n* **Security:** Cryptographic techniques and decentralized architecture make blockchain highly resistant to fraud and cyberattacks.\\n* **Efficiency:** Automated processes and reduced intermediaries can streamline transactions, saving time and costs.\\n* **Trustless:** Blockchain eliminates the need for intermediaries, enabling trustless transactions between parties who may not know or trust each other.')]"
479
  ]
480
  },
481
- "execution_count": 45,
482
  "metadata": {},
483
  "output_type": "execute_result"
484
  }
@@ -487,6 +500,149 @@
487
  "chat_history"
488
  ]
489
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
490
  {
491
  "cell_type": "code",
492
  "execution_count": null,
 
1
  {
2
  "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "metadata": {},
6
+ "source": [
7
+ "## References\n",
8
+ "[runnanbles - API](https://api.python.langchain.com/en/latest/core_api_reference.html#module-langchain_core.runnables) \n",
9
+ "[RunnableParallel](https://python.langchain.com/docs/expression_language/how_to/map) \n",
10
+ "[RunnablePassthrough](https://python.langchain.com/docs/expression_language/how_to/passthrough) \n",
11
+ "[]() \n",
12
+ "[]() \n",
13
+ "[]() \n"
14
+ ]
15
+ },
16
  {
17
  "cell_type": "code",
18
+ "execution_count": 35,
19
  "metadata": {},
20
  "outputs": [],
21
  "source": [
 
27
  "# Langchain\n",
28
  "from langchain.chains import RetrievalQA\n",
29
  "from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder\n",
30
+ "from langchain_core.runnables import RunnableParallel,RunnablePassthrough,RunnableLambda\n",
31
  "from langchain_core.output_parsers import StrOutputParser\n",
32
  "from langchain_core.messages import AIMessage, HumanMessage\n",
33
  "\n",
 
44
  },
45
  {
46
  "cell_type": "code",
47
+ "execution_count": 30,
48
  "metadata": {},
49
  "outputs": [],
50
  "source": [
 
89
  },
90
  {
91
  "cell_type": "code",
92
+ "execution_count": 4,
93
  "metadata": {},
94
  "outputs": [
95
  {
 
125
  },
126
  {
127
  "cell_type": "code",
128
+ "execution_count": 5,
129
  "metadata": {},
130
  "outputs": [
131
  {
 
134
  "'Que es blockchain? : Blockchain es una cadena de bloques\\nPara que se usa : Para registrar transacciones\\n'"
135
  ]
136
  },
137
+ "execution_count": 5,
138
  "metadata": {},
139
  "output_type": "execute_result"
140
  }
 
155
  },
156
  {
157
  "cell_type": "code",
158
+ "execution_count": 6,
159
  "metadata": {},
160
  "outputs": [
161
  {
 
168
  {
169
  "data": {
170
  "text/plain": [
171
+ "AIMessage(content='The provided conversation history does not contain any information about consensus.')"
172
  ]
173
  },
174
+ "execution_count": 6,
175
  "metadata": {},
176
  "output_type": "execute_result"
177
  }
 
196
  },
197
  {
198
  "cell_type": "code",
199
+ "execution_count": 7,
200
  "metadata": {},
201
  "outputs": [],
202
  "source": [
 
217
  },
218
  {
219
  "cell_type": "code",
220
+ "execution_count": 8,
221
  "metadata": {},
222
  "outputs": [
223
  {
 
228
  " Document(page_content='present in a peer-to-peer network, originally deployed for the bitcoin cryptocurrency. All\\nthe nodes present on the chain maintain a complete local copy of the blockchain. The\\nblockchain is an indigenous technology that has emerged for decentralized applications\\nas the outcome of complication, privacy, and security issues present in the applications\\nover half a century [3,4]. It is a peer-to-peer system that authorizes the users to maintain a\\nledger for various transactions that are reproduced, and remains identical in more than\\none location over multiple user servers [5].', metadata={'source': 'D:\\\\Downloads\\\\Datatrust-Info\\\\Text\\\\custody\\\\CustodyBlock-2021.txt'}),\n",
229
  " Document(page_content='2. Background\\nBlockchain technology has emerged as a disruptive innovation, providing a decentralized and transparent environment across various domains. Blockchain can be understood\\nas a distributed ledger technology that enables secure and immutable record-keeping of\\ndigital transactions. It comprises a chain of blocks, each containing a list of validated and\\ntime-stamped transactions. An interesting feature of blockchain is its decentralized nature,\\nwhere multiple participants, or nodes, maintain copies of the ledger. This distributed', metadata={'source': 'D:\\\\Downloads\\\\Datatrust-Info\\\\Text\\\\custody\\\\ExploringBC-2023.txt'})],\n",
230
  " 'question': 'What is a blockchain?',\n",
231
+ " 'answer': 'A blockchain is a distributed ledger technology that enables secure and immutable record-keeping of digital transactions. It comprises a chain of blocks, each containing a list of validated and time-stamped transactions. An interesting feature of blockchain is its decentralized nature, where multiple participants, or nodes, maintain copies of the ledger.'}"
232
  ]
233
  },
234
+ "execution_count": 8,
235
  "metadata": {},
236
  "output_type": "execute_result"
237
  }
 
243
  },
244
  {
245
  "cell_type": "code",
246
+ "execution_count": 9,
247
  "metadata": {},
248
  "outputs": [
249
  {
250
  "data": {
251
  "text/plain": [
252
  "('What is a blockchain?',\n",
253
+ " 'A blockchain is a distributed ledger technology that enables secure and immutable record-keeping of digital transactions. It comprises a chain of blocks, each containing a list of validated and time-stamped transactions. An interesting feature of blockchain is its decentralized nature, where multiple participants, or nodes, maintain copies of the ledger.')"
254
  ]
255
  },
256
+ "execution_count": 9,
257
  "metadata": {},
258
  "output_type": "execute_result"
259
  }
 
264
  },
265
  {
266
  "cell_type": "code",
267
+ "execution_count": 10,
268
  "metadata": {},
269
  "outputs": [
270
  {
 
285
  },
286
  {
287
  "cell_type": "code",
288
+ "execution_count": 11,
289
  "metadata": {},
290
  "outputs": [
291
  {
 
297
  " 'D:\\\\Downloads\\\\Datatrust-Info\\\\Text\\\\custody\\\\ExploringBC-2023.txt']"
298
  ]
299
  },
300
+ "execution_count": 11,
301
  "metadata": {},
302
  "output_type": "execute_result"
303
  }
 
316
  },
317
  {
318
  "cell_type": "code",
319
+ "execution_count": 12,
320
  "metadata": {},
321
  "outputs": [],
322
  "source": [
 
331
  },
332
  {
333
  "cell_type": "code",
334
+ "execution_count": 13,
335
  "metadata": {},
336
  "outputs": [],
337
  "source": [
 
348
  },
349
  {
350
  "cell_type": "code",
351
+ "execution_count": 14,
352
  "metadata": {},
353
  "outputs": [
354
  {
 
357
  "'Data governance is the process of managing the availability, usability, integrity, and security of data in enterprise systems. It ensures that data is consistent, trustworthy, and not misused. Data governance is critical as organizations face new data privacy regulations and rely more on data analytics for decision-making.'"
358
  ]
359
  },
360
+ "execution_count": 14,
361
  "metadata": {},
362
  "output_type": "execute_result"
363
  }
 
375
  },
376
  {
377
  "cell_type": "code",
378
+ "execution_count": 15,
379
  "metadata": {},
380
  "outputs": [
381
  {
382
  "data": {
383
  "text/plain": [
384
+ "'Large in the context of large language models (LLMs) typically refers to the size and capacity of the model\\'s parameters and training data. It can have several implications:\\n\\n**1. Model Complexity:**\\nLLMs with a large number of parameters (e.g., billions or trillions) can capture complex relationships and patterns in the data, enabling them to perform a wide range of language-related tasks.\\n\\n**2. Data Volume:**\\nLLMs are trained on vast datasets of text and code, which provides them with a comprehensive understanding of language and its usage. The size of the training data contributes to the model\\'s ability to generalize and handle diverse language inputs.\\n\\n**3. Computational Resources:**\\nTraining and deploying LLMs require significant computational resources due to their large size. This includes specialized hardware (e.g., GPUs) and distributed computing systems to handle the massive datasets and complex model architectures.\\n\\n**4. Performance:**\\nIn general, larger LLMs tend to perform better on language-related tasks compared to smaller models. They can achieve higher accuracy, handle longer sequences, and generate more coherent and informative text.\\n\\n**5. Applications:**\\nThe large size of LLMs enables them to be used in a wide range of applications, including natural language processing (NLP), machine translation, text summarization, question answering, dialogue systems, and code generation.\\n\\nIt\\'s important to note that the definition of \"large\" in LLMs can vary depending on the specific model and the context in which it is used.'"
385
  ]
386
  },
387
+ "execution_count": 15,
388
  "metadata": {},
389
  "output_type": "execute_result"
390
  }
 
425
  },
426
  {
427
  "cell_type": "code",
428
+ "execution_count": 16,
429
  "metadata": {},
430
  "outputs": [],
431
  "source": [
 
462
  },
463
  {
464
  "cell_type": "code",
465
+ "execution_count": 17,
466
  "metadata": {},
467
  "outputs": [],
468
  "source": [
 
479
  },
480
  {
481
  "cell_type": "code",
482
+ "execution_count": 18,
483
  "metadata": {},
484
  "outputs": [
485
  {
 
488
  "[HumanMessage(content='What is a Blockchain?'),\n",
489
  " AIMessage(content='A blockchain is a distributed ledger technology that enables secure and immutable record-keeping of digital transactions. \\nIt comprises a chain of blocks, each containing a list of validated and time-stamped transactions. \\nMultiple participants, or nodes, maintain copies of the ledger, making it decentralized.'),\n",
490
  " HumanMessage(content='What are its benefits?'),\n",
491
+ " AIMessage(content='The benefits of blockchain technology include:\\n\\n* **Transparency:** Transactions are recorded on a public ledger, allowing for visibility and verification by all participants.\\n* **Immutability:** Once a transaction is recorded on the blockchain, it cannot be altered or deleted, ensuring the integrity of the data.\\n* **Security:** Cryptographic techniques and decentralized architecture make blockchain highly resistant to hacking and fraud.\\n* **Efficiency:** Automated processes and reduced intermediaries can streamline transactions and lower costs.\\n* **Traceability:** The chronological and tamper-proof nature of blockchain provides a clear audit trail for transactions.')]"
492
  ]
493
  },
494
+ "execution_count": 18,
495
  "metadata": {},
496
  "output_type": "execute_result"
497
  }
 
500
  "chat_history"
501
  ]
502
  },
503
+ {
504
+ "cell_type": "markdown",
505
+ "metadata": {},
506
+ "source": [
507
+ "## Combining doc retrieval and chat_history"
508
+ ]
509
+ },
510
+ {
511
+ "cell_type": "code",
512
+ "execution_count": 47,
513
+ "metadata": {},
514
+ "outputs": [
515
+ {
516
+ "name": "stdout",
517
+ "output_type": "stream",
518
+ "text": [
519
+ "a {'x': 'hola'}\n"
520
+ ]
521
+ },
522
+ {
523
+ "data": {
524
+ "text/plain": [
525
+ "{'x': 'hola'}"
526
+ ]
527
+ },
528
+ "execution_count": 47,
529
+ "metadata": {},
530
+ "output_type": "execute_result"
531
+ }
532
+ ],
533
+ "source": [
534
+ "def pipeLog(s:str, x):\n",
535
+ " print(s, x)\n",
536
+ " return x\n",
537
+ "\n",
538
+ "pipe_a = RunnableLambda(lambda x: pipeLog(\"a\",x))\n",
539
+ "\n",
540
+ "pipe_a.invoke({'x':\"hola\"})"
541
+ ]
542
+ },
543
+ {
544
+ "cell_type": "code",
545
+ "execution_count": 45,
546
+ "metadata": {},
547
+ "outputs": [],
548
+ "source": [
549
+ "rag_chain_with_source = RunnableParallel(\n",
550
+ " {\"context\": itemgetter(\"question\")|retriever, \"question\": itemgetter(\"question\"), \"chat_history\": itemgetter(\"chat_history\") }\n",
551
+ ").assign(answer=rag_chain)\n"
552
+ ]
553
+ },
554
+ {
555
+ "cell_type": "code",
556
+ "execution_count": 46,
557
+ "metadata": {},
558
+ "outputs": [
559
+ {
560
+ "data": {
561
+ "text/plain": [
562
+ "{'context': [Document(page_content='ledger for various transactions that are reproduced, and remains identical in more than\\none location over multiple user servers [5].\\nA blockchain is essentially a block of chains, with the growing list of records referred\\nto as blocks that are joined with cryptography [4]. Each blockchain contains a hash of a\\nprevious block, and a timestamp that keeps track of the creation and modification time of', metadata={'source': 'D:\\\\Downloads\\\\Datatrust-Info\\\\Text\\\\custody\\\\CustodyBlock-2021.txt'}),\n",
563
+ " Document(page_content='Such a system must meet several requirements, including data integrity,\\nchain of custody, auditing capabilities, and evidence preservation.\\nBlockchain technology offers a potential solution to these challenges by\\nenabling the verification of the legality and authenticity of methods used\\nfor digital evidence gathering, storage, and transfer. A blockchain is\\nessentially a series of linked data structures known as blocks, which can\\nbe used to store and monitor the status of distributed systems on a\\npeer-to-peer network. Each block is connected to a previous block called', metadata={'source': 'D:\\\\Downloads\\\\Datatrust-Info\\\\Text\\\\custody\\\\BlockchainBased-2023.txt'}),\n",
564
+ " Document(page_content='present in a peer-to-peer network, originally deployed for the bitcoin cryptocurrency. All\\nthe nodes present on the chain maintain a complete local copy of the blockchain. The\\nblockchain is an indigenous technology that has emerged for decentralized applications\\nas the outcome of complication, privacy, and security issues present in the applications\\nover half a century [3,4]. It is a peer-to-peer system that authorizes the users to maintain a\\nledger for various transactions that are reproduced, and remains identical in more than\\none location over multiple user servers [5].', metadata={'source': 'D:\\\\Downloads\\\\Datatrust-Info\\\\Text\\\\custody\\\\CustodyBlock-2021.txt'}),\n",
565
+ " Document(page_content='2. Background\\nBlockchain technology has emerged as a disruptive innovation, providing a decentralized and transparent environment across various domains. Blockchain can be understood\\nas a distributed ledger technology that enables secure and immutable record-keeping of\\ndigital transactions. It comprises a chain of blocks, each containing a list of validated and\\ntime-stamped transactions. An interesting feature of blockchain is its decentralized nature,\\nwhere multiple participants, or nodes, maintain copies of the ledger. This distributed', metadata={'source': 'D:\\\\Downloads\\\\Datatrust-Info\\\\Text\\\\custody\\\\ExploringBC-2023.txt'})],\n",
566
+ " 'question': 'What is a Blockchain?',\n",
567
+ " 'chat_history': [],\n",
568
+ " 'answer': AIMessage(content='A blockchain is a distributed ledger technology that enables secure and immutable record-keeping of digital transactions. \\nIt comprises a chain of blocks, each containing a list of validated and time-stamped transactions. \\nMultiple participants, or nodes, maintain copies of the ledger, making it decentralized.')}"
569
+ ]
570
+ },
571
+ "execution_count": 46,
572
+ "metadata": {},
573
+ "output_type": "execute_result"
574
+ }
575
+ ],
576
+ "source": [
577
+ "chat_history = []\n",
578
+ "\n",
579
+ "question = \"What is a Blockchain?\"\n",
580
+ "ai_msg = rag_chain_with_source.invoke({\"question\": question, \"chat_history\": chat_history})\n",
581
+ "ai_msg\n"
582
+ ]
583
+ },
584
+ {
585
+ "cell_type": "code",
586
+ "execution_count": 40,
587
+ "metadata": {},
588
+ "outputs": [
589
+ {
590
+ "name": "stdout",
591
+ "output_type": "stream",
592
+ "text": [
593
+ "{'question': 'Is this a question?', 'chat_history': []}\n"
594
+ ]
595
+ },
596
+ {
597
+ "data": {
598
+ "text/plain": [
599
+ "{'input': {'question': 'Is this a question?', 'chat_history': []},\n",
600
+ " 'context': [Document(page_content='▪\\n\\n▪', metadata={'source': 'D:\\\\Downloads\\\\Datatrust-Info\\\\Text\\\\technology\\\\Gaia_X_Compliance_Document_Final_f.txt'}),\n",
601
+ " Document(page_content='1/10\\n\\n\\x0c11/3/23, 2:12 PM\\n\\nWe need data trusts to help manage our data | MIT Technology Review\\n\\nFebruary 24, 2021\\n\\nDo you simply click “Yes”\\nwhenever a company asks for your\\ndata? If so, you’re not alone. We\\ncan’t be expected to read the\\nlengthy terms and conditions or\\nevaluate all the risks every time we\\nuse a service. That’s like asking\\neach of us to assess whether the\\nwater we drink is safe every time\\nwe take a sip. So we hit “Yes” and\\nhope for the best.\\nEven if you’ve done your research,\\nthough, your decision could affect\\nother people in ways you didn’t\\naccount for. When you share your', metadata={'source': 'D:\\\\Downloads\\\\Datatrust-Info\\\\Text\\\\dataTrust\\\\We need data trusts to help manage our data _ MIT Technology Review.txt'}),\n",
602
+ " Document(page_content='𝐶 𝐴𝑠𝑡𝑎𝑡𝑢𝑠 =', metadata={'source': 'D:\\\\Downloads\\\\Datatrust-Info\\\\Text\\\\technology\\\\1_s20_S2667096822000180_main.txt'}),\n",
603
+ " Document(page_content='(10)', metadata={'source': 'D:\\\\Downloads\\\\Datatrust-Info\\\\Text\\\\technology\\\\A_Data_Sharing_Protocol_to_Minimize_Security_and_Privacy_Risks_of_Cloud_Storage_in_Big_Data_Era.txt'})],\n",
604
+ " 'question': 'Is this a question?'}"
605
+ ]
606
+ },
607
+ "execution_count": 40,
608
+ "metadata": {},
609
+ "output_type": "execute_result"
610
+ }
611
+ ],
612
+ "source": [
613
+ "rp = RunnableParallel(\n",
614
+ " {\"input\": pipe, \"context\": itemgetter(\"question\")|retriever, \"question\": itemgetter(\"question\") }\n",
615
+ ")\n",
616
+ "rp.invoke({ 'question': \"Is this a question?\", \"chat_history\": [] })"
617
+ ]
618
+ },
619
+ {
620
+ "cell_type": "code",
621
+ "execution_count": 42,
622
+ "metadata": {},
623
+ "outputs": [
624
+ {
625
+ "data": {
626
+ "text/plain": [
627
+ "{'context': [Document(page_content='GAIA-X, estamos hablando de la ejecución de un Servicio en los Nodos. Cada Servicio puede\\nutilizar un solo Nodo o ser ejecutado en varios. Las instancias de servicio pueden consumir\\notras instancias de servicio de las que dependen.\\n•', metadata={'source': 'D:\\\\Downloads\\\\Datatrust-Info\\\\Text\\\\dataTrust\\\\Interplataformas21_WhitePaper_DataSpaces.txt'}),\n",
628
+ " Document(page_content='el segundo semestre del año. Más a largo plazo, los escenarios de la OMT indican que el turismo\\ninternacional podría tardar entre dos años y medio y cuatro en volver a los niveles de 2019.\\nEn este contexto, la pandemia ha acelerado la necesidad de hacer llegar la transformación digital tanto\\nde empresas turísticas como de los destinos de nuestro país. La digitalización será el pilar bajo el cual\\nse pueda mejorar la experiencia del turista en el destino, impulsar la competitividad de España a nivel', metadata={'source': 'D:\\\\Downloads\\\\Datatrust-Info\\\\Text\\\\dataTrust\\\\Interplataformas21_WhitePaper_DataSpaces.txt'}),\n",
629
+ " Document(page_content='Tipo\\n\\nPersonal\\n\\nLugar\\n\\nOrganización', metadata={'source': 'D:\\\\Downloads\\\\Datatrust-Info\\\\Text\\\\riskManagement\\\\articles_237907_maestro_mspi.txt'}),\n",
630
+ " Document(page_content='ocurra un cuello de botella, en el cual, la cantidad de personas que intentan acceder a un servidor sea\\nmás grande de lo que este puede soportar, causando tiempos de espera y ralentizando el sistema.\\nEn una red distribuida la extracción de cualquiera de los nodos no desconectaría de la red a ningún\\notro. Todos los nodos se conectan entre sí, sin que tengan que pasar necesariamente por uno o varios\\ncentros locales. En este tipo de redes desaparece la división centro/periferia y por tanto el poder de\\nfiltro sobre la información que fluye por ella, lo que lo hace un sistema práctico y eficiente.', metadata={'source': 'D:\\\\Downloads\\\\Datatrust-Info\\\\Text\\\\blockchain\\\\articles_179085_recurso_3.txt'})],\n",
631
+ " 'question': 'hola mundo'}"
632
+ ]
633
+ },
634
+ "execution_count": 42,
635
+ "metadata": {},
636
+ "output_type": "execute_result"
637
+ }
638
+ ],
639
+ "source": [
640
+ "rag_chain_with_source = RunnableParallel(\n",
641
+ " {\"context\": retriever, \"question\": RunnablePassthrough() }\n",
642
+ ")\n",
643
+ "rag_chain_with_source.invoke(\"hola mundo\")"
644
+ ]
645
+ },
646
  {
647
  "cell_type": "code",
648
  "execution_count": null,