Spaces:
Sleeping
Sleeping
Manual rebuild of the chain
Browse files- notebooks/transcript_rag.ipynb +44 -62
- pstuts_rag/pstuts_rag/graph.py +38 -66
- pstuts_rag/pstuts_rag/state.py +71 -15
- pyproject.toml +1 -1
notebooks/transcript_rag.ipynb
CHANGED
|
@@ -59,7 +59,7 @@
|
|
| 59 |
},
|
| 60 |
{
|
| 61 |
"cell_type": "code",
|
| 62 |
-
"execution_count":
|
| 63 |
"metadata": {},
|
| 64 |
"outputs": [
|
| 65 |
{
|
|
@@ -86,7 +86,7 @@
|
|
| 86 |
"name": "stderr",
|
| 87 |
"output_type": "stream",
|
| 88 |
"text": [
|
| 89 |
-
"2025-05-31
|
| 90 |
]
|
| 91 |
}
|
| 92 |
],
|
|
@@ -103,23 +103,17 @@
|
|
| 103 |
"name": "stderr",
|
| 104 |
"output_type": "stream",
|
| 105 |
"text": [
|
| 106 |
-
"2025-05-31
|
| 107 |
-
"2025-05-31
|
| 108 |
-
"2025-05-31
|
| 109 |
-
"2025-05-31
|
| 110 |
-
"2025-05-31
|
| 111 |
-
"2025-05-31
|
| 112 |
-
"2025-05-31
|
| 113 |
-
"2025-05-31
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
"name": "stderr",
|
| 118 |
-
"output_type": "stream",
|
| 119 |
-
"text": [
|
| 120 |
-
"2025-05-31 00:25:18,104 - INFO - print - max_research_loops: 2\n",
|
| 121 |
-
"2025-05-31 00:25:18,105 - INFO - print - llm_tool_model: qwen3:4b\n",
|
| 122 |
-
"2025-05-31 00:25:18,106 - INFO - print - n_context_docs: 3\n"
|
| 123 |
]
|
| 124 |
}
|
| 125 |
],
|
|
@@ -136,8 +130,8 @@
|
|
| 136 |
"name": "stderr",
|
| 137 |
"output_type": "stream",
|
| 138 |
"text": [
|
| 139 |
-
"2025-05-31
|
| 140 |
-
"2025-05-31
|
| 141 |
]
|
| 142 |
},
|
| 143 |
{
|
|
@@ -166,22 +160,22 @@
|
|
| 166 |
"name": "stderr",
|
| 167 |
"output_type": "stream",
|
| 168 |
"text": [
|
| 169 |
-
"2025-05-31
|
| 170 |
-
"2025-05-31
|
| 171 |
-
"2025-05-31
|
| 172 |
-
"2025-05-31
|
| 173 |
-
"2025-05-31
|
| 174 |
-
"2025-05-31
|
| 175 |
-
"2025-05-31
|
| 176 |
-
"2025-05-31
|
| 177 |
-
"2025-05-31
|
| 178 |
-
"2025-05-31
|
| 179 |
-
"2025-05-31
|
| 180 |
-
"2025-05-31
|
| 181 |
-
"2025-05-31
|
| 182 |
-
"2025-05-31
|
| 183 |
-
"2025-05-31
|
| 184 |
-
"2025-05-31
|
| 185 |
]
|
| 186 |
},
|
| 187 |
{
|
|
@@ -217,8 +211,8 @@
|
|
| 217 |
"name": "stderr",
|
| 218 |
"output_type": "stream",
|
| 219 |
"text": [
|
| 220 |
-
"2025-05-31
|
| 221 |
-
"2025-05-31
|
| 222 |
]
|
| 223 |
}
|
| 224 |
],
|
|
@@ -237,19 +231,7 @@
|
|
| 237 |
"text": [
|
| 238 |
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
|
| 239 |
"\n",
|
| 240 |
-
"
|
| 241 |
-
"\n",
|
| 242 |
-
"A **layer** in Photoshop is a separate component of an image, acting like a \"flat pint of glass\" stacked on top of others. Each layer holds its own content (like a photo or artwork) and can be hidden or shown using the **Eye icon** in the Layers panel. \n",
|
| 243 |
-
"\n",
|
| 244 |
-
"Key points from the video: \n",
|
| 245 |
-
"- Layers are the building blocks of any image in Photoshop. \n",
|
| 246 |
-
"- They allow you to edit parts of an image independently without affecting other parts. \n",
|
| 247 |
-
"- Transparent areas in a layer let you see through to layers below (e.g., a checkerboard pattern in the example). \n",
|
| 248 |
-
"- The **Layers panel** lets you manage and select individual layers for editing. \n",
|
| 249 |
-
"\n",
|
| 250 |
-
"Timestamp: **00:00:00–00:00:23** (first explanation). \n",
|
| 251 |
-
"\n",
|
| 252 |
-
"This is covered in the \"Understand layers\" video. 🎨\n",
|
| 253 |
"**REFERENCES**\n",
|
| 254 |
"[\n",
|
| 255 |
" {\n",
|
|
@@ -261,14 +243,14 @@
|
|
| 261 |
" {\n",
|
| 262 |
" \"title\": \"Understand layers\",\n",
|
| 263 |
" \"source\": \"https://images-tv.adobe.com/avp/vr/b758b4c4-2a74-41f4-8e67-e2f2eab83c6a/f810fc5b-2b04-4e23-8fa4-5c532e7de6f8/e268fe4d-e5c7-415c-9f5c-d34d024b14d8_20170727011753.1280x720at2400_h264.mp4\",\n",
|
| 264 |
-
" \"start\":
|
| 265 |
-
" \"stop\":
|
| 266 |
" },\n",
|
| 267 |
" {\n",
|
| 268 |
-
" \"title\": \"
|
| 269 |
-
" \"source\": \"https://
|
| 270 |
-
" \"start\":
|
| 271 |
-
" \"stop\":
|
| 272 |
" }\n",
|
| 273 |
"]\n"
|
| 274 |
]
|
|
@@ -286,9 +268,9 @@
|
|
| 286 |
{
|
| 287 |
"data": {
|
| 288 |
"text/plain": [
|
| 289 |
-
"[Document(metadata={'video_id': 19172, 'title': 'Understand layers', 'desc': 'Learn what layers are and why they are so useful.', 'length': '00:04:44.75', 'group': 'data/test.json', 'source': 'https://images-tv.adobe.com/avp/vr/b758b4c4-2a74-41f4-8e67-e2f2eab83c6a/f810fc5b-2b04-4e23-8fa4-5c532e7de6f8/e268fe4d-e5c7-415c-9f5c-d34d024b14d8_20170727011753.1280x720at2400_h264.mp4', 'speech_start_stop_times': [[0.47, 3.41], [3.81, 9.13], [9.309999, 15.01], [15.299999, 20.57], [20.88, 23.3], [23.83, 27.93], [29.38, 32.79], [32.96, 33.92], [34.43, 40.21], [41.91, 45.37], [45.88, 49.01], [49.54, 55.130001], [55.72, 58.49], [58.72, 62.14]], 'start': 0.47, 'stop': 62.14, '_id': 21, '_collection_name': '
|
| 290 |
-
" Document(metadata={'video_id': 19172, 'title': 'Understand layers', 'desc': 'Learn what layers are and why they are so useful.', 'length': '00:04:44.75', 'group': 'data/test.json', 'source': 'https://images-tv.adobe.com/avp/vr/b758b4c4-2a74-41f4-8e67-e2f2eab83c6a/f810fc5b-2b04-4e23-8fa4-5c532e7de6f8/e268fe4d-e5c7-415c-9f5c-d34d024b14d8_20170727011753.1280x720at2400_h264.mp4', 'speech_start_stop_times': [[
|
| 291 |
-
" Document(metadata={'video_id':
|
| 292 |
]
|
| 293 |
},
|
| 294 |
"execution_count": 13,
|
|
@@ -308,7 +290,7 @@
|
|
| 308 |
{
|
| 309 |
"data": {
|
| 310 |
"text/plain": [
|
| 311 |
-
"Configuration(eva_workflow_name='EVA_workflow', eva_log_level='INFO', transcript_glob='./data/test.json', embedding_model='
|
| 312 |
]
|
| 313 |
},
|
| 314 |
"execution_count": 14,
|
|
|
|
| 59 |
},
|
| 60 |
{
|
| 61 |
"cell_type": "code",
|
| 62 |
+
"execution_count": 5,
|
| 63 |
"metadata": {},
|
| 64 |
"outputs": [
|
| 65 |
{
|
|
|
|
| 86 |
"name": "stderr",
|
| 87 |
"output_type": "stream",
|
| 88 |
"text": [
|
| 89 |
+
"2025-05-31 01:01:57,165 - INFO - <module> - Loaded .env file\n"
|
| 90 |
]
|
| 91 |
}
|
| 92 |
],
|
|
|
|
| 103 |
"name": "stderr",
|
| 104 |
"output_type": "stream",
|
| 105 |
"text": [
|
| 106 |
+
"2025-05-31 01:01:59,036 - INFO - print - Configuration parameters:\n",
|
| 107 |
+
"2025-05-31 01:01:59,037 - INFO - print - eva_workflow_name: EVA_workflow\n",
|
| 108 |
+
"2025-05-31 01:01:59,038 - INFO - print - eva_log_level: INFO\n",
|
| 109 |
+
"2025-05-31 01:01:59,039 - INFO - print - transcript_glob: ./data/test.json\n",
|
| 110 |
+
"2025-05-31 01:01:59,040 - INFO - print - embedding_model: text-embedding-3-small\n",
|
| 111 |
+
"2025-05-31 01:01:59,042 - INFO - print - eva_strip_think: True\n",
|
| 112 |
+
"2025-05-31 01:01:59,042 - INFO - print - embedding_api: ModelAPI.OPENAI\n",
|
| 113 |
+
"2025-05-31 01:01:59,042 - INFO - print - llm_api: ModelAPI.OPENAI\n",
|
| 114 |
+
"2025-05-31 01:01:59,043 - INFO - print - max_research_loops: 2\n",
|
| 115 |
+
"2025-05-31 01:01:59,043 - INFO - print - llm_tool_model: gpt-4.1-mini\n",
|
| 116 |
+
"2025-05-31 01:01:59,044 - INFO - print - n_context_docs: 3\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
]
|
| 118 |
}
|
| 119 |
],
|
|
|
|
| 130 |
"name": "stderr",
|
| 131 |
"output_type": "stream",
|
| 132 |
"text": [
|
| 133 |
+
"2025-05-31 01:01:59,776 - INFO - _send_single_request - HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n",
|
| 134 |
+
"2025-05-31 01:02:00,223 - INFO - _send_single_request - HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n"
|
| 135 |
]
|
| 136 |
},
|
| 137 |
{
|
|
|
|
| 160 |
"name": "stderr",
|
| 161 |
"output_type": "stream",
|
| 162 |
"text": [
|
| 163 |
+
"2025-05-31 01:02:00,707 - INFO - _send_single_request - HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n",
|
| 164 |
+
"2025-05-31 01:02:00,717 - INFO - _send_single_request - HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n",
|
| 165 |
+
"2025-05-31 01:02:00,786 - INFO - _send_single_request - HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n",
|
| 166 |
+
"2025-05-31 01:02:00,830 - INFO - _send_single_request - HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n",
|
| 167 |
+
"2025-05-31 01:02:01,060 - INFO - _send_single_request - HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n",
|
| 168 |
+
"2025-05-31 01:02:01,092 - INFO - _send_single_request - HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n",
|
| 169 |
+
"2025-05-31 01:02:01,234 - INFO - _send_single_request - HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n",
|
| 170 |
+
"2025-05-31 01:02:01,452 - INFO - _send_single_request - HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n",
|
| 171 |
+
"2025-05-31 01:02:01,543 - INFO - _send_single_request - HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n",
|
| 172 |
+
"2025-05-31 01:02:01,773 - INFO - _send_single_request - HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n",
|
| 173 |
+
"2025-05-31 01:02:02,459 - INFO - _send_single_request - HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n",
|
| 174 |
+
"2025-05-31 01:02:02,951 - INFO - _send_single_request - HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n",
|
| 175 |
+
"2025-05-31 01:02:03,020 - INFO - _send_single_request - HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n",
|
| 176 |
+
"2025-05-31 01:02:03,075 - INFO - _send_single_request - HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n",
|
| 177 |
+
"2025-05-31 01:02:03,301 - INFO - _send_single_request - HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n",
|
| 178 |
+
"2025-05-31 01:02:05,029 - INFO - _send_single_request - HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n"
|
| 179 |
]
|
| 180 |
},
|
| 181 |
{
|
|
|
|
| 211 |
"name": "stderr",
|
| 212 |
"output_type": "stream",
|
| 213 |
"text": [
|
| 214 |
+
"2025-05-31 01:02:05,322 - INFO - _send_single_request - HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n",
|
| 215 |
+
"2025-05-31 01:02:08,523 - INFO - _send_single_request - HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n"
|
| 216 |
]
|
| 217 |
}
|
| 218 |
],
|
|
|
|
| 231 |
"text": [
|
| 232 |
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
|
| 233 |
"\n",
|
| 234 |
+
"A layer in Photoshop CC is like a separate flat pane of glass stacked on top of others. Each layer contains separate pieces of content that you can work on independently. Layers may have transparent areas that let you see through to layers below. The biggest benefit of using layers is being able to edit parts of an image independently without affecting the rest of the image (see 0:47–3:41 and 85:45–87:10). 🎨🖼️\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 235 |
"**REFERENCES**\n",
|
| 236 |
"[\n",
|
| 237 |
" {\n",
|
|
|
|
| 243 |
" {\n",
|
| 244 |
" \"title\": \"Understand layers\",\n",
|
| 245 |
" \"source\": \"https://images-tv.adobe.com/avp/vr/b758b4c4-2a74-41f4-8e67-e2f2eab83c6a/f810fc5b-2b04-4e23-8fa4-5c532e7de6f8/e268fe4d-e5c7-415c-9f5c-d34d024b14d8_20170727011753.1280x720at2400_h264.mp4\",\n",
|
| 246 |
+
" \"start\": 85.75,\n",
|
| 247 |
+
" \"stop\": 152.97\n",
|
| 248 |
" },\n",
|
| 249 |
" {\n",
|
| 250 |
+
" \"title\": \"Get organized with layer groups\",\n",
|
| 251 |
+
" \"source\": \"https://videos-tv.adobe.com/2013-07-23/f65b5a0ef188ba5e5a96df93a8ead3cf.mp4\",\n",
|
| 252 |
+
" \"start\": 181.35,\n",
|
| 253 |
+
" \"stop\": 239.72\n",
|
| 254 |
" }\n",
|
| 255 |
"]\n"
|
| 256 |
]
|
|
|
|
| 268 |
{
|
| 269 |
"data": {
|
| 270 |
"text/plain": [
|
| 271 |
+
"[Document(metadata={'video_id': 19172, 'title': 'Understand layers', 'desc': 'Learn what layers are and why they are so useful.', 'length': '00:04:44.75', 'group': 'data/test.json', 'source': 'https://images-tv.adobe.com/avp/vr/b758b4c4-2a74-41f4-8e67-e2f2eab83c6a/f810fc5b-2b04-4e23-8fa4-5c532e7de6f8/e268fe4d-e5c7-415c-9f5c-d34d024b14d8_20170727011753.1280x720at2400_h264.mp4', 'speech_start_stop_times': [[0.47, 3.41], [3.81, 9.13], [9.309999, 15.01], [15.299999, 20.57], [20.88, 23.3], [23.83, 27.93], [29.38, 32.79], [32.96, 33.92], [34.43, 40.21], [41.91, 45.37], [45.88, 49.01], [49.54, 55.130001], [55.72, 58.49], [58.72, 62.14]], 'start': 0.47, 'stop': 62.14, '_id': 21, '_collection_name': 'a19921d4-b5da-4707-bf34-1b1e3828d581'}, page_content=\"Layers are the building blocks of any image in Photoshop CC. So, it's important to understand, what layers are and why to use them - which we'll cover in this video. If you're following along, open this layered image from the downloadable practice files for this tutorial. You might think of layers like separate flat pints of glass, stacked one on top of the other. Each layer contains separate pieces of content. To get a sense of how layers are constructed, let's take a look at this Layers panel. I've closed my other panels, so that we can focus on the Layers panel. But you can skip that. By the way: If your Layers panel isn't showing, go up to the Window menu and choose Layers from there. The Layers panel is where you go to select and work with layers. In this image there are 4 layers, each with separate content. If you click the Eye icon to the left of a layer, you can toggle the visibility of that layer off and on. So, I'm going to turn off the visibility of the tailor layer. And keep your eye on the image, so you can see what's on that layer.\"),\n",
|
| 272 |
+
" Document(metadata={'video_id': 19172, 'title': 'Understand layers', 'desc': 'Learn what layers are and why they are so useful.', 'length': '00:04:44.75', 'group': 'data/test.json', 'source': 'https://images-tv.adobe.com/avp/vr/b758b4c4-2a74-41f4-8e67-e2f2eab83c6a/f810fc5b-2b04-4e23-8fa4-5c532e7de6f8/e268fe4d-e5c7-415c-9f5c-d34d024b14d8_20170727011753.1280x720at2400_h264.mp4', 'speech_start_stop_times': [[85.75, 88.659999], [89.42, 100.11], [101.469999, 108.64], [109.09, 117.459999], [117.75, 129.45], [129.97, 133.37], [133.73, 143.98], [144.76, 152.97]], 'start': 85.75, 'stop': 152.97, '_id': 23, '_collection_name': 'a19921d4-b5da-4707-bf34-1b1e3828d581'}, page_content=\"Now let's take a look at just one layer, the tailor layer. A quick way to turn off all the layers except the tailor layer, is to hold down the Option key on the Mac, or the ALT key on the PC, and click on the Eye icon to the left of the tailor layer. In the Document window, you can see that this layer contains just the one small photo surrounded by a gray and white checkerboard pattern. That pattern represents transparent pixels, which allow us to see down through the corresponding part of this layer to the content of the layers below. So, let's turn that content back on by going back to the Layers panel, again holding the Option key on the Mac or the ALT key on the PC and clicking on the Eye icon to the left of the tailor layer. And all the other layers and their Eye icons come back into view. So again: You might think of layers like a stack of pints of glass, each with its own artwork and in some cases transparent areas that let you see down through to the layers below. The biggest benefit of having items on separate layers like this, is that you'll be able to edit pieces of an image independently without affecting the rest of the image.\"),\n",
|
| 273 |
+
" Document(metadata={'video_id': 4157, 'title': 'Get organized with layer groups', 'desc': 'Learn some great tips for working with layers.', 'length': '00:04:05.78', 'group': 'data/test.json', 'source': 'https://videos-tv.adobe.com/2013-07-23/f65b5a0ef188ba5e5a96df93a8ead3cf.mp4', 'speech_start_stop_times': [[181.35, 182.05], [183.16, 184.01], [184.44, 186.49], [186.62, 187.46], [187.97, 191.469999], [191.79, 194.43], [195.0, 203.05], [203.13, 205.93], [206.37, 209.55], [210.09, 212.449999], [212.46, 215.060001], [215.609999, 219.54], [220.02, 223.97], [225.37, 227.109999], [227.929999, 228.57], [229.07, 239.72]], 'start': 181.35, 'stop': 239.72, '_id': 3, '_collection_name': 'a19921d4-b5da-4707-bf34-1b1e3828d581'}, page_content='Click \"OK\". You did it again. Let\\'s talk about one more way to do this. Using searching. Let\\'s say we have a complicated document that has a lot of stuff in it. We have a lot of type or text layers. That\\'s a unique type of layer in the Layers panel, which got a bunch of them you\\'ve got a couple of hundred layers and these are scattered all over the place. And you want all of your type layers in one group. So we go to the Search facility here and click \"Kind\". And then click this button for just type layers. It isolates everything but those two. We select them with a \"Shift\" click, go up here, this is the easiest way to do it. Go into \"New Group from Layers\" and call it \"Text\" - type whatever you want. And then turn this back off again - here. And there you go. Creating groups gives you control, it reduces the chaos and clutter in the Layers panel and if at any time you change your mind, it\\'s a very easy thing to take them back out again.')]"
|
| 274 |
]
|
| 275 |
},
|
| 276 |
"execution_count": 13,
|
|
|
|
| 290 |
{
|
| 291 |
"data": {
|
| 292 |
"text/plain": [
|
| 293 |
+
"Configuration(eva_workflow_name='EVA_workflow', eva_log_level='INFO', transcript_glob='./data/test.json', embedding_model='text-embedding-3-small', eva_strip_think=True, embedding_api=<ModelAPI.OPENAI: 'OPENAI'>, llm_api=<ModelAPI.OPENAI: 'OPENAI'>, max_research_loops=2, llm_tool_model='gpt-4.1-mini', n_context_docs=3)"
|
| 294 |
]
|
| 295 |
},
|
| 296 |
"execution_count": 14,
|
pstuts_rag/pstuts_rag/graph.py
CHANGED
|
@@ -8,7 +8,8 @@ from langchain_core.messages import AIMessage
|
|
| 8 |
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
|
| 9 |
from langchain_core.runnables import Runnable, RunnableLambda
|
| 10 |
|
| 11 |
-
from
|
|
|
|
| 12 |
|
| 13 |
from pstuts_rag.prompts import SUPERVISOR_SYSTEM, TAVILY_SYSTEM
|
| 14 |
from pstuts_rag.state import PsTutsTeamState
|
|
@@ -21,6 +22,7 @@ import logging
|
|
| 21 |
from typing import Callable, Dict, Tuple, Optional, Union
|
| 22 |
|
| 23 |
from langchain_huggingface import HuggingFaceEmbeddings
|
|
|
|
| 24 |
|
| 25 |
from app import (
|
| 26 |
ADOBEHELP,
|
|
@@ -116,8 +118,7 @@ def create_agent(
|
|
| 116 |
|
| 117 |
|
| 118 |
def create_tavily_node(
|
| 119 |
-
|
| 120 |
-
) -> Tuple[Callable, AgentExecutor, TavilySearchResults]:
|
| 121 |
"""Initialize tool, agent, and node for Tavily search of helpx.adobe.com.
|
| 122 |
|
| 123 |
This function sets up a search agent that can query Adobe Photoshop help topics
|
|
@@ -133,6 +134,10 @@ def create_tavily_node(
|
|
| 133 |
- The configured agent executor
|
| 134 |
- The Tavily search tool instance
|
| 135 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
| 136 |
|
| 137 |
adobe_help_search = TavilySearchResults(
|
| 138 |
max_results=5, include_domains=["helpx.adobe.com"]
|
|
@@ -147,7 +152,11 @@ def create_tavily_node(
|
|
| 147 |
return adobe_help_node
|
| 148 |
|
| 149 |
|
| 150 |
-
def create_team_supervisor(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 151 |
"""An LLM-based router."""
|
| 152 |
options = ["FINISH"] + members
|
| 153 |
function_def = {
|
|
@@ -167,6 +176,10 @@ def create_team_supervisor(llm: BaseChatModel, system_prompt, members):
|
|
| 167 |
"required": ["next"],
|
| 168 |
},
|
| 169 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
| 170 |
prompt = ChatPromptTemplate.from_messages(
|
| 171 |
[
|
| 172 |
("system", system_prompt),
|
|
@@ -176,60 +189,27 @@ def create_team_supervisor(llm: BaseChatModel, system_prompt, members):
|
|
| 176 |
).partial(options=str(options), team_members=", ".join(members))
|
| 177 |
return (
|
| 178 |
prompt
|
| 179 |
-
| llm.
|
|
|
|
|
|
|
|
|
|
| 180 |
| JsonOutputFunctionsParser()
|
| 181 |
)
|
| 182 |
|
| 183 |
|
| 184 |
-
|
| 185 |
-
config=Configuration(), on_loading_complete: Optional[Callable] = None
|
| 186 |
-
):
|
| 187 |
-
"""
|
| 188 |
-
Initialize the application with optional loading completion callback.
|
| 189 |
-
|
| 190 |
-
Args:
|
| 191 |
-
config: Configuration object with application settings
|
| 192 |
-
on_loading_complete: Optional callback (sync or async) to call when
|
| 193 |
-
datastore loading completes
|
| 194 |
-
|
| 195 |
-
Returns:
|
| 196 |
-
DatastoreManager: The initialized datastore manager
|
| 197 |
-
"""
|
| 198 |
-
|
| 199 |
-
### PROCESS THE CONFIGURATION
|
| 200 |
-
log_level = getattr(logging, config.eva_log_level, logging.INFO)
|
| 201 |
-
logging.basicConfig(level=log_level, format="%(levelname)s: %(message)s")
|
| 202 |
-
|
| 203 |
-
### CREATE THE DATABASE
|
| 204 |
|
| 205 |
-
datastore = DatastoreManager(
|
| 206 |
-
|
| 207 |
-
|
|
|
|
|
|
|
| 208 |
)
|
| 209 |
|
| 210 |
-
### START DATABASE POPULATION
|
| 211 |
-
|
| 212 |
-
globs = [str(g) for g in config.transcript_glob.split(":")]
|
| 213 |
-
|
| 214 |
-
# Add custom callback if provided, otherwise use default logging
|
| 215 |
-
if on_loading_complete:
|
| 216 |
-
datastore.add_completion_callback(on_loading_complete)
|
| 217 |
-
else:
|
| 218 |
-
# Default callback for logging
|
| 219 |
-
def default_logging_callback():
|
| 220 |
-
logging.info("🎉 Datastore loading completed!")
|
| 221 |
-
|
| 222 |
-
datastore.add_completion_callback(default_logging_callback)
|
| 223 |
-
|
| 224 |
-
asyncio.create_task(datastore.from_json_globs(globs))
|
| 225 |
-
|
| 226 |
-
### CREATE THE RAG CHAIN
|
| 227 |
-
ai_graph = StateGraph(PsTutsTeamState, config_schema=Configuration)
|
| 228 |
-
|
| 229 |
return datastore
|
| 230 |
|
| 231 |
|
| 232 |
-
async def build_the_graph(
|
| 233 |
"""
|
| 234 |
Builds the agent graph for routing user queries.
|
| 235 |
|
|
@@ -239,33 +219,24 @@ async def build_the_graph(current_state: ApplicationState):
|
|
| 239 |
Args:
|
| 240 |
current_state: Current application state with required components
|
| 241 |
"""
|
| 242 |
-
adobe_help_node, _, _ = create_tavily_node(
|
| 243 |
-
llm=app_state.llm, name=ADOBEHELP
|
| 244 |
-
)
|
| 245 |
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
)
|
| 250 |
|
| 251 |
supervisor_agent = create_team_supervisor(
|
| 252 |
-
current_state.llm,
|
| 253 |
SUPERVISOR_SYSTEM,
|
| 254 |
-
[VIDEOARCHIVE, ADOBEHELP],
|
| 255 |
)
|
| 256 |
|
| 257 |
-
ai_graph =
|
| 258 |
|
| 259 |
ai_graph.add_node(VIDEOARCHIVE, rag_node)
|
| 260 |
ai_graph.add_node(ADOBEHELP, adobe_help_node)
|
| 261 |
ai_graph.add_node("supervisor", supervisor_agent)
|
| 262 |
|
| 263 |
-
|
| 264 |
-
|
| 265 |
-
[ADOBEHELP, "supervisor"],
|
| 266 |
-
]
|
| 267 |
-
|
| 268 |
-
[ai_graph.add_edge(*p) for p in edges]
|
| 269 |
|
| 270 |
ai_graph.add_conditional_edges(
|
| 271 |
"supervisor",
|
|
@@ -273,7 +244,7 @@ async def build_the_graph(current_state: ApplicationState):
|
|
| 273 |
{
|
| 274 |
VIDEOARCHIVE: VIDEOARCHIVE,
|
| 275 |
ADOBEHELP: ADOBEHELP,
|
| 276 |
-
"FINISH":
|
| 277 |
},
|
| 278 |
)
|
| 279 |
|
|
@@ -283,4 +254,5 @@ async def build_the_graph(current_state: ApplicationState):
|
|
| 283 |
|
| 284 |
|
| 285 |
# Note: Cannot run build_the_graph() here as it requires current_state parameter
|
| 286 |
-
|
|
|
|
|
|
| 8 |
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
|
| 9 |
from langchain_core.runnables import Runnable, RunnableLambda
|
| 10 |
|
| 11 |
+
from langchain_openai import ChatOpenAI
|
| 12 |
+
from langgraph.graph import StateGraph, START, END
|
| 13 |
|
| 14 |
from pstuts_rag.prompts import SUPERVISOR_SYSTEM, TAVILY_SYSTEM
|
| 15 |
from pstuts_rag.state import PsTutsTeamState
|
|
|
|
| 22 |
from typing import Callable, Dict, Tuple, Optional, Union
|
| 23 |
|
| 24 |
from langchain_huggingface import HuggingFaceEmbeddings
|
| 25 |
+
from pstuts_rag.utils import ChatAPISelector
|
| 26 |
|
| 27 |
from app import (
|
| 28 |
ADOBEHELP,
|
|
|
|
| 118 |
|
| 119 |
|
| 120 |
def create_tavily_node(
|
| 121 |
+
name: str = "AdobeHelp", config: Configuration = Configuration() ) -> Callable
|
|
|
|
| 122 |
"""Initialize tool, agent, and node for Tavily search of helpx.adobe.com.
|
| 123 |
|
| 124 |
This function sets up a search agent that can query Adobe Photoshop help topics
|
|
|
|
| 134 |
- The configured agent executor
|
| 135 |
- The Tavily search tool instance
|
| 136 |
"""
|
| 137 |
+
|
| 138 |
+
cls = ChatAPISelector.get(config.llm_api, ChatOpenAI)
|
| 139 |
+
llm = cls(model=config.llm_tool_model)
|
| 140 |
+
|
| 141 |
|
| 142 |
adobe_help_search = TavilySearchResults(
|
| 143 |
max_results=5, include_domains=["helpx.adobe.com"]
|
|
|
|
| 152 |
return adobe_help_node
|
| 153 |
|
| 154 |
|
| 155 |
+
def create_team_supervisor(
|
| 156 |
+
system_prompt,
|
| 157 |
+
members,
|
| 158 |
+
config: Configuration = Configuration(),
|
| 159 |
+
):
|
| 160 |
"""An LLM-based router."""
|
| 161 |
options = ["FINISH"] + members
|
| 162 |
function_def = {
|
|
|
|
| 176 |
"required": ["next"],
|
| 177 |
},
|
| 178 |
}
|
| 179 |
+
|
| 180 |
+
cls = ChatAPISelector.get(config.llm_api, ChatOpenAI)
|
| 181 |
+
llm = cls(model=config.llm_tool_model)
|
| 182 |
+
|
| 183 |
prompt = ChatPromptTemplate.from_messages(
|
| 184 |
[
|
| 185 |
("system", system_prompt),
|
|
|
|
| 189 |
).partial(options=str(options), team_members=", ".join(members))
|
| 190 |
return (
|
| 191 |
prompt
|
| 192 |
+
| llm.bind_tools(
|
| 193 |
+
tools=[function_def],
|
| 194 |
+
tool_choice={"type": "function", "function": {"name": "route"}},
|
| 195 |
+
)
|
| 196 |
| JsonOutputFunctionsParser()
|
| 197 |
)
|
| 198 |
|
| 199 |
|
| 200 |
+
def initialize_datastore(callback: Optional[Callable] = None):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 201 |
|
| 202 |
+
datastore = DatastoreManager()
|
| 203 |
+
if callback:
|
| 204 |
+
datastore.add_completion_callback(callback)
|
| 205 |
+
asyncio.create_task(
|
| 206 |
+
datastore.from_json_globs(Configuration().transcript_glob)
|
| 207 |
)
|
| 208 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 209 |
return datastore
|
| 210 |
|
| 211 |
|
| 212 |
+
async def build_the_graph(datastore: DatastoreManager, config:Configuration=Configuration()):
|
| 213 |
"""
|
| 214 |
Builds the agent graph for routing user queries.
|
| 215 |
|
|
|
|
| 219 |
Args:
|
| 220 |
current_state: Current application state with required components
|
| 221 |
"""
|
|
|
|
|
|
|
|
|
|
| 222 |
|
| 223 |
+
adobe_help_node = create_tavily_node(name=ADOBEHELP, config=config)
|
| 224 |
+
|
| 225 |
+
rag_node = create_transcript_rag_chain(datastore, config=config)
|
|
|
|
| 226 |
|
| 227 |
supervisor_agent = create_team_supervisor(
|
|
|
|
| 228 |
SUPERVISOR_SYSTEM,
|
| 229 |
+
[VIDEOARCHIVE, ADOBEHELP], config=config
|
| 230 |
)
|
| 231 |
|
| 232 |
+
ai_graph = StateGraph(PsTutsTeamState, config_schema=Configuration)
|
| 233 |
|
| 234 |
ai_graph.add_node(VIDEOARCHIVE, rag_node)
|
| 235 |
ai_graph.add_node(ADOBEHELP, adobe_help_node)
|
| 236 |
ai_graph.add_node("supervisor", supervisor_agent)
|
| 237 |
|
| 238 |
+
ai_graph.add_edge(VIDEOARCHIVE, "supervisor")
|
| 239 |
+
ai_graph.add_edge(ADOBEHELP, "supervisor")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 240 |
|
| 241 |
ai_graph.add_conditional_edges(
|
| 242 |
"supervisor",
|
|
|
|
| 244 |
{
|
| 245 |
VIDEOARCHIVE: VIDEOARCHIVE,
|
| 246 |
ADOBEHELP: ADOBEHELP,
|
| 247 |
+
"FINISH": END,
|
| 248 |
},
|
| 249 |
)
|
| 250 |
|
|
|
|
| 254 |
|
| 255 |
|
| 256 |
# Note: Cannot run build_the_graph() here as it requires current_state parameter
|
| 257 |
+
db = initialize_datastore(lambda _ : logging.info("Database initialized"))
|
| 258 |
+
graph = asyncio.run( build_the_graph(db) )
|
pstuts_rag/pstuts_rag/state.py
CHANGED
|
@@ -1,24 +1,80 @@
|
|
| 1 |
-
from
|
| 2 |
from langchain_core.messages import BaseMessage
|
| 3 |
-
|
| 4 |
|
| 5 |
import operator
|
| 6 |
-
from typing import Annotated, List, Tuple, TypedDict
|
| 7 |
|
| 8 |
|
| 9 |
-
class PsTutsTeamState(
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
|
| 15 |
-
@dataclass(kw_only=True)
|
| 16 |
-
class StateInput:
|
| 17 |
-
input_query: str = field(default=None) # Report topic
|
| 18 |
|
|
|
|
|
|
|
| 19 |
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pydantic import BaseModel, Field
|
| 2 |
from langchain_core.messages import BaseMessage
|
| 3 |
+
from typing import List, Optional, Tuple, Dict, Annotated
|
| 4 |
|
| 5 |
import operator
|
|
|
|
| 6 |
|
| 7 |
|
| 8 |
+
class PsTutsTeamState(BaseModel):
|
| 9 |
+
"""State management for PsTuts team workflow orchestration."""
|
| 10 |
+
|
| 11 |
+
messages: Annotated[List[BaseMessage], operator.add] = Field(
|
| 12 |
+
default_factory=list,
|
| 13 |
+
description="Accumulated list of messages exchanged during team collaboration",
|
| 14 |
+
)
|
| 15 |
+
team_members: List[str] = Field(
|
| 16 |
+
default_factory=list,
|
| 17 |
+
description="List of active team member identifiers participating in the workflow",
|
| 18 |
+
)
|
| 19 |
+
next: str = Field(
|
| 20 |
+
default="",
|
| 21 |
+
description="Identifier of the next team member or process step to execute",
|
| 22 |
+
)
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
class StateInput(BaseModel):
|
| 26 |
+
"""Input state for processing user queries and requests."""
|
| 27 |
+
|
| 28 |
+
input_query: str = Field(
|
| 29 |
+
description="The user's input query or topic to be processed by the system",
|
| 30 |
+
)
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
class StateOutput(BaseModel):
|
| 34 |
+
"""Output state containing processed results and references."""
|
| 35 |
+
|
| 36 |
+
output_content: str = Field(
|
| 37 |
+
default="",
|
| 38 |
+
description="The final generated content or report based on the input query",
|
| 39 |
+
)
|
| 40 |
+
video_references: List[Tuple[str, float]] = Field(
|
| 41 |
+
default_factory=list,
|
| 42 |
+
description="List of video references with starting timestamp",
|
| 43 |
+
)
|
| 44 |
+
url_references: List[str] = Field(
|
| 45 |
+
default_factory=list,
|
| 46 |
+
description="List of URL references related to the generated content",
|
| 47 |
+
)
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
class RAGInput(BaseModel):
|
| 51 |
+
"""Input for Retrieval-Augmented Generation processing."""
|
| 52 |
+
|
| 53 |
+
query: str = Field(
|
| 54 |
+
description="The search query to be processed by the RAG system",
|
| 55 |
+
)
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
class VideoSegment(BaseModel):
|
| 59 |
+
"""Summarized video segment"""
|
| 60 |
|
| 61 |
+
path: str = Field(default="", description="Path to video file")
|
| 62 |
+
summary: str = Field(default="", description="Summary of the transcript")
|
| 63 |
+
start: Optional[float] = Field(description="Start time of the segment")
|
| 64 |
+
stop: Optional[float] = Field(description="End time of the segment")
|
| 65 |
|
|
|
|
|
|
|
|
|
|
| 66 |
|
| 67 |
+
class RAGOutput(BaseModel):
|
| 68 |
+
"""Output from Retrieval-Augmented Generation processing."""
|
| 69 |
|
| 70 |
+
query: str = Field(
|
| 71 |
+
default="", description="The original query that was processed"
|
| 72 |
+
)
|
| 73 |
+
content: str = Field(
|
| 74 |
+
default="",
|
| 75 |
+
description="The generated content based on retrieved documents and query",
|
| 76 |
+
)
|
| 77 |
+
context: List[Dict[str, VideoSegment]] = Field(
|
| 78 |
+
default_factory=list,
|
| 79 |
+
description="Retrieved document context with relevance scores and metadata",
|
| 80 |
+
)
|
pyproject.toml
CHANGED
|
@@ -12,7 +12,7 @@ dependencies = [
|
|
| 12 |
"langchain-experimental>=0.3.4",
|
| 13 |
"langchain-openai",
|
| 14 |
"langchain-qdrant>=0.2.0",
|
| 15 |
-
"langgraph>=0.
|
| 16 |
"qdrant-client>=1.8.0",
|
| 17 |
# API integration
|
| 18 |
"chainlit==2.0.4",
|
|
|
|
| 12 |
"langchain-experimental>=0.3.4",
|
| 13 |
"langchain-openai",
|
| 14 |
"langchain-qdrant>=0.2.0",
|
| 15 |
+
"langgraph>=0.2.55",
|
| 16 |
"qdrant-client>=1.8.0",
|
| 17 |
# API integration
|
| 18 |
"chainlit==2.0.4",
|