tregu0458 commited on
Commit
e385e29
·
verified ·
1 Parent(s): 3d0fd01

Delete pdf_2_dify_workflow.ipynb

Browse files
Files changed (1) hide show
  1. pdf_2_dify_workflow.ipynb +0 -765
pdf_2_dify_workflow.ipynb DELETED
@@ -1,765 +0,0 @@
1
- {
2
- "nbformat": 4,
3
- "nbformat_minor": 0,
4
- "metadata": {
5
- "colab": {
6
- "provenance": []
7
- },
8
- "kernelspec": {
9
- "name": "python3",
10
- "display_name": "Python 3"
11
- },
12
- "language_info": {
13
- "name": "python"
14
- }
15
- },
16
- "cells": [
17
- {
18
- "cell_type": "code",
19
- "execution_count": 1,
20
- "metadata": {
21
- "colab": {
22
- "base_uri": "https://localhost:8080/",
23
- "height": 1000
24
- },
25
- "id": "2RCxpRzpqPrB",
26
- "outputId": "2b96ddb3-cc75-4930-a2d0-53c0d7a52ae2"
27
- },
28
- "outputs": [
29
- {
30
- "output_type": "stream",
31
- "name": "stdout",
32
- "text": [
33
- "Collecting gradio\n",
34
- " Downloading gradio-4.31.5-py3-none-any.whl (12.3 MB)\n",
35
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m12.3/12.3 MB\u001b[0m \u001b[31m42.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
36
- "\u001b[?25hCollecting unstructured[pdf]\n",
37
- " Downloading unstructured-0.14.2-py3-none-any.whl (2.0 MB)\n",
38
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.0/2.0 MB\u001b[0m \u001b[31m55.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
39
- "\u001b[?25hCollecting langchain-community\n",
40
- " Downloading langchain_community-0.2.1-py3-none-any.whl (2.1 MB)\n",
41
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.1/2.1 MB\u001b[0m \u001b[31m64.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
42
- "\u001b[?25hCollecting aiofiles<24.0,>=22.0 (from gradio)\n",
43
- " Downloading aiofiles-23.2.1-py3-none-any.whl (15 kB)\n",
44
- "Requirement already satisfied: altair<6.0,>=4.2.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (4.2.2)\n",
45
- "Collecting fastapi (from gradio)\n",
46
- " Downloading fastapi-0.111.0-py3-none-any.whl (91 kB)\n",
47
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m92.0/92.0 kB\u001b[0m \u001b[31m4.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
48
- "\u001b[?25hCollecting ffmpy (from gradio)\n",
49
- " Downloading ffmpy-0.3.2.tar.gz (5.5 kB)\n",
50
- " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
51
- "Collecting gradio-client==0.16.4 (from gradio)\n",
52
- " Downloading gradio_client-0.16.4-py3-none-any.whl (315 kB)\n",
53
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m315.9/315.9 kB\u001b[0m \u001b[31m10.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
54
- "\u001b[?25hCollecting httpx>=0.24.1 (from gradio)\n",
55
- " Downloading httpx-0.27.0-py3-none-any.whl (75 kB)\n",
56
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m75.6/75.6 kB\u001b[0m \u001b[31m4.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
57
- "\u001b[?25hRequirement already satisfied: huggingface-hub>=0.19.3 in /usr/local/lib/python3.10/dist-packages (from gradio) (0.23.1)\n",
58
- "Requirement already satisfied: importlib-resources<7.0,>=1.3 in /usr/local/lib/python3.10/dist-packages (from gradio) (6.4.0)\n",
59
- "Requirement already satisfied: jinja2<4.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (3.1.4)\n",
60
- "Requirement already satisfied: markupsafe~=2.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (2.1.5)\n",
61
- "Requirement already satisfied: matplotlib~=3.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (3.7.1)\n",
62
- "Requirement already satisfied: numpy~=1.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (1.25.2)\n",
63
- "Collecting orjson~=3.0 (from gradio)\n",
64
- " Downloading orjson-3.10.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (142 kB)\n",
65
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m142.5/142.5 kB\u001b[0m \u001b[31m2.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
66
- "\u001b[?25hRequirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from gradio) (24.0)\n",
67
- "Requirement already satisfied: pandas<3.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (2.0.3)\n",
68
- "Requirement already satisfied: pillow<11.0,>=8.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (9.4.0)\n",
69
- "Requirement already satisfied: pydantic>=2.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (2.7.1)\n",
70
- "Collecting pydub (from gradio)\n",
71
- " Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)\n",
72
- "Collecting python-multipart>=0.0.9 (from gradio)\n",
73
- " Downloading python_multipart-0.0.9-py3-none-any.whl (22 kB)\n",
74
- "Requirement already satisfied: pyyaml<7.0,>=5.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (6.0.1)\n",
75
- "Collecting ruff>=0.2.2 (from gradio)\n",
76
- " Downloading ruff-0.4.6-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (8.8 MB)\n",
77
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m8.8/8.8 MB\u001b[0m \u001b[31m42.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
78
- "\u001b[?25hCollecting semantic-version~=2.0 (from gradio)\n",
79
- " Downloading semantic_version-2.10.0-py2.py3-none-any.whl (15 kB)\n",
80
- "Collecting tomlkit==0.12.0 (from gradio)\n",
81
- " Downloading tomlkit-0.12.0-py3-none-any.whl (37 kB)\n",
82
- "Collecting typer<1.0,>=0.12 (from gradio)\n",
83
- " Downloading typer-0.12.3-py3-none-any.whl (47 kB)\n",
84
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m47.2/47.2 kB\u001b[0m \u001b[31m1.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
85
- "\u001b[?25hRequirement already satisfied: typing-extensions~=4.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (4.11.0)\n",
86
- "Requirement already satisfied: urllib3~=2.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (2.0.7)\n",
87
- "Collecting uvicorn>=0.14.0 (from gradio)\n",
88
- " Downloading uvicorn-0.30.0-py3-none-any.whl (62 kB)\n",
89
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m62.4/62.4 kB\u001b[0m \u001b[31m5.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
90
- "\u001b[?25hRequirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from gradio-client==0.16.4->gradio) (2023.6.0)\n",
91
- "Collecting websockets<12.0,>=10.0 (from gradio-client==0.16.4->gradio)\n",
92
- " Downloading websockets-11.0.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (129 kB)\n",
93
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m129.9/129.9 kB\u001b[0m \u001b[31m9.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
94
- "\u001b[?25hRequirement already satisfied: chardet in /usr/local/lib/python3.10/dist-packages (from unstructured[pdf]) (5.2.0)\n",
95
- "Collecting filetype (from unstructured[pdf])\n",
96
- " Downloading filetype-1.2.0-py2.py3-none-any.whl (19 kB)\n",
97
- "Collecting python-magic (from unstructured[pdf])\n",
98
- " Downloading python_magic-0.4.27-py2.py3-none-any.whl (13 kB)\n",
99
- "Requirement already satisfied: lxml in /usr/local/lib/python3.10/dist-packages (from unstructured[pdf]) (4.9.4)\n",
100
- "Requirement already satisfied: nltk in /usr/local/lib/python3.10/dist-packages (from unstructured[pdf]) (3.8.1)\n",
101
- "Requirement already satisfied: tabulate in /usr/local/lib/python3.10/dist-packages (from unstructured[pdf]) (0.9.0)\n",
102
- "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from unstructured[pdf]) (2.31.0)\n",
103
- "Requirement already satisfied: beautifulsoup4 in /usr/local/lib/python3.10/dist-packages (from unstructured[pdf]) (4.12.3)\n",
104
- "Collecting emoji (from unstructured[pdf])\n",
105
- " Downloading emoji-2.12.1-py3-none-any.whl (431 kB)\n",
106
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m431.4/431.4 kB\u001b[0m \u001b[31m37.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
107
- "\u001b[?25hCollecting dataclasses-json (from unstructured[pdf])\n",
108
- " Downloading dataclasses_json-0.6.6-py3-none-any.whl (28 kB)\n",
109
- "Collecting python-iso639 (from unstructured[pdf])\n",
110
- " Downloading python_iso639-2024.4.27-py3-none-any.whl (274 kB)\n",
111
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m274.7/274.7 kB\u001b[0m \u001b[31m20.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
112
- "\u001b[?25hCollecting langdetect (from unstructured[pdf])\n",
113
- " Downloading langdetect-1.0.9.tar.gz (981 kB)\n",
114
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m981.5/981.5 kB\u001b[0m \u001b[31m45.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
115
- "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
116
- "Collecting rapidfuzz (from unstructured[pdf])\n",
117
- " Downloading rapidfuzz-3.9.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.4 MB)\n",
118
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.4/3.4 MB\u001b[0m \u001b[31m18.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
119
- "\u001b[?25hCollecting backoff (from unstructured[pdf])\n",
120
- " Downloading backoff-2.2.1-py3-none-any.whl (15 kB)\n",
121
- "Collecting unstructured-client (from unstructured[pdf])\n",
122
- " Downloading unstructured_client-0.22.0-py3-none-any.whl (28 kB)\n",
123
- "Requirement already satisfied: wrapt in /usr/local/lib/python3.10/dist-packages (from unstructured[pdf]) (1.14.1)\n",
124
- "Collecting onnx (from unstructured[pdf])\n",
125
- " Downloading onnx-1.16.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (15.9 MB)\n",
126
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m15.9/15.9 MB\u001b[0m \u001b[31m51.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
127
- "\u001b[?25hCollecting pdf2image (from unstructured[pdf])\n",
128
- " Downloading pdf2image-1.17.0-py3-none-any.whl (11 kB)\n",
129
- "Collecting pdfminer.six (from unstructured[pdf])\n",
130
- " Downloading pdfminer.six-20231228-py3-none-any.whl (5.6 MB)\n",
131
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m50.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
132
- "\u001b[?25hCollecting pikepdf (from unstructured[pdf])\n",
133
- " Downloading pikepdf-8.15.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.4 MB)\n",
134
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.4/2.4 MB\u001b[0m \u001b[31m59.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
135
- "\u001b[?25hCollecting pillow-heif (from unstructured[pdf])\n",
136
- " Downloading pillow_heif-0.16.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.5 MB)\n",
137
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.5/7.5 MB\u001b[0m \u001b[31m54.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
138
- "\u001b[?25hCollecting pypdf (from unstructured[pdf])\n",
139
- " Downloading pypdf-4.2.0-py3-none-any.whl (290 kB)\n",
140
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m290.4/290.4 kB\u001b[0m \u001b[31m36.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
141
- "\u001b[?25hCollecting pytesseract (from unstructured[pdf])\n",
142
- " Downloading pytesseract-0.3.10-py3-none-any.whl (14 kB)\n",
143
- "Collecting google-cloud-vision (from unstructured[pdf])\n",
144
- " Downloading google_cloud_vision-3.7.2-py2.py3-none-any.whl (459 kB)\n",
145
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m459.6/459.6 kB\u001b[0m \u001b[31m44.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
146
- "\u001b[?25hCollecting effdet (from unstructured[pdf])\n",
147
- " Downloading effdet-0.4.1-py3-none-any.whl (112 kB)\n",
148
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m112.5/112.5 kB\u001b[0m \u001b[31m13.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
149
- "\u001b[?25hCollecting unstructured-inference==0.7.33 (from unstructured[pdf])\n",
150
- " Downloading unstructured_inference-0.7.33-py3-none-any.whl (56 kB)\n",
151
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m56.2/56.2 kB\u001b[0m \u001b[31m8.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
152
- "\u001b[?25hCollecting unstructured.pytesseract>=0.3.12 (from unstructured[pdf])\n",
153
- " Downloading unstructured.pytesseract-0.3.12-py3-none-any.whl (14 kB)\n",
154
- "Collecting layoutparser (from unstructured-inference==0.7.33->unstructured[pdf])\n",
155
- " Downloading layoutparser-0.3.4-py3-none-any.whl (19.2 MB)\n",
156
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m19.2/19.2 MB\u001b[0m \u001b[31m64.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
157
- "\u001b[?25hRequirement already satisfied: opencv-python!=4.7.0.68 in /usr/local/lib/python3.10/dist-packages (from unstructured-inference==0.7.33->unstructured[pdf]) (4.8.0.76)\n",
158
- "Collecting onnxruntime>=1.17.0 (from unstructured-inference==0.7.33->unstructured[pdf])\n",
159
- " Downloading onnxruntime-1.18.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (6.8 MB)\n",
160
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.8/6.8 MB\u001b[0m \u001b[31m102.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
161
- "\u001b[?25hRequirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (from unstructured-inference==0.7.33->unstructured[pdf]) (2.3.0+cu121)\n",
162
- "Collecting timm (from unstructured-inference==0.7.33->unstructured[pdf])\n",
163
- " Downloading timm-1.0.3-py3-none-any.whl (2.3 MB)\n",
164
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.3/2.3 MB\u001b[0m \u001b[31m76.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
165
- "\u001b[?25hRequirement already satisfied: transformers>=4.25.1 in /usr/local/lib/python3.10/dist-packages (from unstructured-inference==0.7.33->unstructured[pdf]) (4.41.1)\n",
166
- "Requirement already satisfied: SQLAlchemy<3,>=1.4 in /usr/local/lib/python3.10/dist-packages (from langchain-community) (2.0.30)\n",
167
- "Requirement already satisfied: aiohttp<4.0.0,>=3.8.3 in /usr/local/lib/python3.10/dist-packages (from langchain-community) (3.9.5)\n",
168
- "Collecting langchain<0.3.0,>=0.2.0 (from langchain-community)\n",
169
- " Downloading langchain-0.2.1-py3-none-any.whl (973 kB)\n",
170
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m973.5/973.5 kB\u001b[0m \u001b[31m61.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
171
- "\u001b[?25hCollecting langchain-core<0.3.0,>=0.2.0 (from langchain-community)\n",
172
- " Downloading langchain_core-0.2.1-py3-none-any.whl (308 kB)\n",
173
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m308.5/308.5 kB\u001b[0m \u001b[31m36.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
174
- "\u001b[?25hCollecting langsmith<0.2.0,>=0.1.0 (from langchain-community)\n",
175
- " Downloading langsmith-0.1.63-py3-none-any.whl (122 kB)\n",
176
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m122.8/122.8 kB\u001b[0m \u001b[31m18.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
177
- "\u001b[?25hRequirement already satisfied: tenacity<9.0.0,>=8.1.0 in /usr/local/lib/python3.10/dist-packages (from langchain-community) (8.3.0)\n",
178
- "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community) (1.3.1)\n",
179
- "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community) (23.2.0)\n",
180
- "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community) (1.4.1)\n",
181
- "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community) (6.0.5)\n",
182
- "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community) (1.9.4)\n",
183
- "Requirement already satisfied: async-timeout<5.0,>=4.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community) (4.0.3)\n",
184
- "Requirement already satisfied: entrypoints in /usr/local/lib/python3.10/dist-packages (from altair<6.0,>=4.2.0->gradio) (0.4)\n",
185
- "Requirement already satisfied: jsonschema>=3.0 in /usr/local/lib/python3.10/dist-packages (from altair<6.0,>=4.2.0->gradio) (4.19.2)\n",
186
- "Requirement already satisfied: toolz in /usr/local/lib/python3.10/dist-packages (from altair<6.0,>=4.2.0->gradio) (0.12.1)\n",
187
- "Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json->unstructured[pdf])\n",
188
- " Downloading marshmallow-3.21.2-py3-none-any.whl (49 kB)\n",
189
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m49.3/49.3 kB\u001b[0m \u001b[31m7.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
190
- "\u001b[?25hCollecting typing-inspect<1,>=0.4.0 (from dataclasses-json->unstructured[pdf])\n",
191
- " Downloading typing_inspect-0.9.0-py3-none-any.whl (8.8 kB)\n",
192
- "Requirement already satisfied: anyio in /usr/local/lib/python3.10/dist-packages (from httpx>=0.24.1->gradio) (3.7.1)\n",
193
- "Requirement already satisfied: certifi in /usr/local/lib/python3.10/dist-packages (from httpx>=0.24.1->gradio) (2024.2.2)\n",
194
- "Collecting httpcore==1.* (from httpx>=0.24.1->gradio)\n",
195
- " Downloading httpcore-1.0.5-py3-none-any.whl (77 kB)\n",
196
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m77.9/77.9 kB\u001b[0m \u001b[31m11.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
197
- "\u001b[?25hRequirement already satisfied: idna in /usr/local/lib/python3.10/dist-packages (from httpx>=0.24.1->gradio) (3.7)\n",
198
- "Requirement already satisfied: sniffio in /usr/local/lib/python3.10/dist-packages (from httpx>=0.24.1->gradio) (1.3.1)\n",
199
- "Collecting h11<0.15,>=0.13 (from httpcore==1.*->httpx>=0.24.1->gradio)\n",
200
- " Downloading h11-0.14.0-py3-none-any.whl (58 kB)\n",
201
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.3/58.3 kB\u001b[0m \u001b[31m8.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
202
- "\u001b[?25hRequirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.19.3->gradio) (3.14.0)\n",
203
- "Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.19.3->gradio) (4.66.4)\n",
204
- "Collecting langchain-text-splitters<0.3.0,>=0.2.0 (from langchain<0.3.0,>=0.2.0->langchain-community)\n",
205
- " Downloading langchain_text_splitters-0.2.0-py3-none-any.whl (23 kB)\n",
206
- "Collecting jsonpatch<2.0,>=1.33 (from langchain-core<0.3.0,>=0.2.0->langchain-community)\n",
207
- " Downloading jsonpatch-1.33-py2.py3-none-any.whl (12 kB)\n",
208
- "Collecting packaging (from gradio)\n",
209
- " Downloading packaging-23.2-py3-none-any.whl (53 kB)\n",
210
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m53.0/53.0 kB\u001b[0m \u001b[31m8.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
211
- "\u001b[?25hRequirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib~=3.0->gradio) (1.2.1)\n",
212
- "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib~=3.0->gradio) (0.12.1)\n",
213
- "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib~=3.0->gradio) (4.51.0)\n",
214
- "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib~=3.0->gradio) (1.4.5)\n",
215
- "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib~=3.0->gradio) (3.1.2)\n",
216
- "Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.10/dist-packages (from matplotlib~=3.0->gradio) (2.8.2)\n",
217
- "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas<3.0,>=1.0->gradio) (2023.4)\n",
218
- "Requirement already satisfied: tzdata>=2022.1 in /usr/local/lib/python3.10/dist-packages (from pandas<3.0,>=1.0->gradio) (2024.1)\n",
219
- "Requirement already satisfied: annotated-types>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from pydantic>=2.0->gradio) (0.7.0)\n",
220
- "Requirement already satisfied: pydantic-core==2.18.2 in /usr/local/lib/python3.10/dist-packages (from pydantic>=2.0->gradio) (2.18.2)\n",
221
- "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->unstructured[pdf]) (3.3.2)\n",
222
- "Requirement already satisfied: greenlet!=0.4.17 in /usr/local/lib/python3.10/dist-packages (from SQLAlchemy<3,>=1.4->langchain-community) (3.0.3)\n",
223
- "Requirement already satisfied: click>=8.0.0 in /usr/local/lib/python3.10/dist-packages (from typer<1.0,>=0.12->gradio) (8.1.7)\n",
224
- "Collecting shellingham>=1.3.0 (from typer<1.0,>=0.12->gradio)\n",
225
- " Downloading shellingham-1.5.4-py2.py3-none-any.whl (9.8 kB)\n",
226
- "Requirement already satisfied: rich>=10.11.0 in /usr/local/lib/python3.10/dist-packages (from typer<1.0,>=0.12->gradio) (13.7.1)\n",
227
- "Requirement already satisfied: soupsieve>1.2 in /usr/local/lib/python3.10/dist-packages (from beautifulsoup4->unstructured[pdf]) (2.5)\n",
228
- "Requirement already satisfied: torchvision in /usr/local/lib/python3.10/dist-packages (from effdet->unstructured[pdf]) (0.18.0+cu121)\n",
229
- "Requirement already satisfied: pycocotools>=2.0.2 in /usr/local/lib/python3.10/dist-packages (from effdet->unstructured[pdf]) (2.0.7)\n",
230
- "Collecting omegaconf>=2.0 (from effdet->unstructured[pdf])\n",
231
- " Downloading omegaconf-2.3.0-py3-none-any.whl (79 kB)\n",
232
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m79.5/79.5 kB\u001b[0m \u001b[31m11.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
233
- "\u001b[?25hCollecting starlette<0.38.0,>=0.37.2 (from fastapi->gradio)\n",
234
- " Downloading starlette-0.37.2-py3-none-any.whl (71 kB)\n",
235
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m71.9/71.9 kB\u001b[0m \u001b[31m11.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
236
- "\u001b[?25hCollecting fastapi-cli>=0.0.2 (from fastapi->gradio)\n",
237
- " Downloading fastapi_cli-0.0.4-py3-none-any.whl (9.5 kB)\n",
238
- "Collecting ujson!=4.0.2,!=4.1.0,!=4.2.0,!=4.3.0,!=5.0.0,!=5.1.0,>=4.0.1 (from fastapi->gradio)\n",
239
- " Downloading ujson-5.10.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (53 kB)\n",
240
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m53.6/53.6 kB\u001b[0m \u001b[31m7.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
241
- "\u001b[?25hCollecting email_validator>=2.0.0 (from fastapi->gradio)\n",
242
- " Downloading email_validator-2.1.1-py3-none-any.whl (30 kB)\n",
243
- "Requirement already satisfied: google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.10.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,!=2.8.*,!=2.9.*,<3.0.0dev,>=1.34.1 in /usr/local/lib/python3.10/dist-packages (from google-cloud-vision->unstructured[pdf]) (2.11.1)\n",
244
- "Requirement already satisfied: google-auth!=2.24.0,!=2.25.0,<3.0.0dev,>=2.14.1 in /usr/local/lib/python3.10/dist-packages (from google-cloud-vision->unstructured[pdf]) (2.27.0)\n",
245
- "Requirement already satisfied: proto-plus<2.0.0dev,>=1.22.3 in /usr/local/lib/python3.10/dist-packages (from google-cloud-vision->unstructured[pdf]) (1.23.0)\n",
246
- "Requirement already satisfied: protobuf!=3.20.0,!=3.20.1,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.19.5 in /usr/local/lib/python3.10/dist-packages (from google-cloud-vision->unstructured[pdf]) (3.20.3)\n",
247
- "Requirement already satisfied: six in /usr/local/lib/python3.10/dist-packages (from langdetect->unstructured[pdf]) (1.16.0)\n",
248
- "Requirement already satisfied: joblib in /usr/local/lib/python3.10/dist-packages (from nltk->unstructured[pdf]) (1.4.2)\n",
249
- "Requirement already satisfied: regex>=2021.8.3 in /usr/local/lib/python3.10/dist-packages (from nltk->unstructured[pdf]) (2024.5.15)\n",
250
- "Requirement already satisfied: cryptography>=36.0.0 in /usr/local/lib/python3.10/dist-packages (from pdfminer.six->unstructured[pdf]) (42.0.7)\n",
251
- "Collecting pillow<11.0,>=8.0 (from gradio)\n",
252
- " Downloading pillow-10.3.0-cp310-cp310-manylinux_2_28_x86_64.whl (4.5 MB)\n",
253
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.5/4.5 MB\u001b[0m \u001b[31m97.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
254
- "\u001b[?25hCollecting Deprecated (from pikepdf->unstructured[pdf])\n",
255
- " Downloading Deprecated-1.2.14-py2.py3-none-any.whl (9.6 kB)\n",
256
- "Collecting deepdiff>=6.0 (from unstructured-client->unstructured[pdf])\n",
257
- " Downloading deepdiff-7.0.1-py3-none-any.whl (80 kB)\n",
258
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m80.8/80.8 kB\u001b[0m \u001b[31m12.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
259
- "\u001b[?25hCollecting jsonpath-python>=1.0.6 (from unstructured-client->unstructured[pdf])\n",
260
- " Downloading jsonpath_python-1.0.6-py3-none-any.whl (7.6 kB)\n",
261
- "Collecting mypy-extensions>=1.0.0 (from unstructured-client->unstructured[pdf])\n",
262
- " Downloading mypy_extensions-1.0.0-py3-none-any.whl (4.7 kB)\n",
263
- "Requirement already satisfied: cffi>=1.12 in /usr/local/lib/python3.10/dist-packages (from cryptography>=36.0.0->pdfminer.six->unstructured[pdf]) (1.16.0)\n",
264
- "Collecting ordered-set<4.2.0,>=4.1.0 (from deepdiff>=6.0->unstructured-client->unstructured[pdf])\n",
265
- " Downloading ordered_set-4.1.0-py3-none-any.whl (7.6 kB)\n",
266
- "Collecting dnspython>=2.0.0 (from email_validator>=2.0.0->fastapi->gradio)\n",
267
- " Downloading dnspython-2.6.1-py3-none-any.whl (307 kB)\n",
268
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m307.7/307.7 kB\u001b[0m \u001b[31m34.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
269
- "\u001b[?25hRequirement already satisfied: googleapis-common-protos<2.0.dev0,>=1.56.2 in /usr/local/lib/python3.10/dist-packages (from google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.10.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,!=2.8.*,!=2.9.*,<3.0.0dev,>=1.34.1->google-cloud-vision->unstructured[pdf]) (1.63.0)\n",
270
- "Requirement already satisfied: grpcio<2.0dev,>=1.33.2 in /usr/local/lib/python3.10/dist-packages (from google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.10.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,!=2.8.*,!=2.9.*,<3.0.0dev,>=1.34.1->google-cloud-vision->unstructured[pdf]) (1.64.0)\n",
271
- "Requirement already satisfied: grpcio-status<2.0.dev0,>=1.33.2 in /usr/local/lib/python3.10/dist-packages (from google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.10.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,!=2.8.*,!=2.9.*,<3.0.0dev,>=1.34.1->google-cloud-vision->unstructured[pdf]) (1.48.2)\n",
272
- "Requirement already satisfied: cachetools<6.0,>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from google-auth!=2.24.0,!=2.25.0,<3.0.0dev,>=2.14.1->google-cloud-vision->unstructured[pdf]) (5.3.3)\n",
273
- "Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.10/dist-packages (from google-auth!=2.24.0,!=2.25.0,<3.0.0dev,>=2.14.1->google-cloud-vision->unstructured[pdf]) (0.4.0)\n",
274
- "Requirement already satisfied: rsa<5,>=3.1.4 in /usr/local/lib/python3.10/dist-packages (from google-auth!=2.24.0,!=2.25.0,<3.0.0dev,>=2.14.1->google-cloud-vision->unstructured[pdf]) (4.9)\n",
275
- "Collecting jsonpointer>=1.9 (from jsonpatch<2.0,>=1.33->langchain-core<0.3.0,>=0.2.0->langchain-community)\n",
276
- " Downloading jsonpointer-2.4-py2.py3-none-any.whl (7.8 kB)\n",
277
- "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair<6.0,>=4.2.0->gradio) (2023.12.1)\n",
278
- "Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair<6.0,>=4.2.0->gradio) (0.35.1)\n",
279
- "Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair<6.0,>=4.2.0->gradio) (0.18.1)\n",
280
- "Collecting antlr4-python3-runtime==4.9.* (from omegaconf>=2.0->effdet->unstructured[pdf])\n",
281
- " Downloading antlr4-python3-runtime-4.9.3.tar.gz (117 kB)\n",
282
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m117.0/117.0 kB\u001b[0m \u001b[31m17.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
283
- "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
284
- "Collecting coloredlogs (from onnxruntime>=1.17.0->unstructured-inference==0.7.33->unstructured[pdf])\n",
285
- " Downloading coloredlogs-15.0.1-py2.py3-none-any.whl (46 kB)\n",
286
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m46.0/46.0 kB\u001b[0m \u001b[31m7.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
287
- "\u001b[?25hRequirement already satisfied: flatbuffers in /usr/local/lib/python3.10/dist-packages (from onnxruntime>=1.17.0->unstructured-inference==0.7.33->unstructured[pdf]) (24.3.25)\n",
288
- "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from onnxruntime>=1.17.0->unstructured-inference==0.7.33->unstructured[pdf]) (1.12)\n",
289
- "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.10/dist-packages (from rich>=10.11.0->typer<1.0,>=0.12->gradio) (3.0.0)\n",
290
- "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.10/dist-packages (from rich>=10.11.0->typer<1.0,>=0.12->gradio) (2.16.1)\n",
291
- "Requirement already satisfied: exceptiongroup in /usr/local/lib/python3.10/dist-packages (from anyio->httpx>=0.24.1->gradio) (1.2.1)\n",
292
- "Requirement already satisfied: safetensors in /usr/local/lib/python3.10/dist-packages (from timm->unstructured-inference==0.7.33->unstructured[pdf]) (0.4.3)\n",
293
- "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch->unstructured-inference==0.7.33->unstructured[pdf]) (3.3)\n",
294
- "Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch->unstructured-inference==0.7.33->unstructured[pdf])\n",
295
- " Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)\n",
296
- "Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch->unstructured-inference==0.7.33->unstructured[pdf])\n",
297
- " Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)\n",
298
- "Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch->unstructured-inference==0.7.33->unstructured[pdf])\n",
299
- " Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)\n",
300
- "Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch->unstructured-inference==0.7.33->unstructured[pdf])\n",
301
- " Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)\n",
302
- "Collecting nvidia-cublas-cu12==12.1.3.1 (from torch->unstructured-inference==0.7.33->unstructured[pdf])\n",
303
- " Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl (410.6 MB)\n",
304
- "Collecting nvidia-cufft-cu12==11.0.2.54 (from torch->unstructured-inference==0.7.33->unstructured[pdf])\n",
305
- " Using cached nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl (121.6 MB)\n",
306
- "Collecting nvidia-curand-cu12==10.3.2.106 (from torch->unstructured-inference==0.7.33->unstructured[pdf])\n",
307
- " Using cached nvidia_curand_cu12-10.3.2.106-py3-none-manylinux1_x86_64.whl (56.5 MB)\n",
308
- "Collecting nvidia-cusolver-cu12==11.4.5.107 (from torch->unstructured-inference==0.7.33->unstructured[pdf])\n",
309
- " Using cached nvidia_cusolver_cu12-11.4.5.107-py3-none-manylinux1_x86_64.whl (124.2 MB)\n",
310
- "Collecting nvidia-cusparse-cu12==12.1.0.106 (from torch->unstructured-inference==0.7.33->unstructured[pdf])\n",
311
- " Using cached nvidia_cusparse_cu12-12.1.0.106-py3-none-manylinux1_x86_64.whl (196.0 MB)\n",
312
- "Collecting nvidia-nccl-cu12==2.20.5 (from torch->unstructured-inference==0.7.33->unstructured[pdf])\n",
313
- " Using cached nvidia_nccl_cu12-2.20.5-py3-none-manylinux2014_x86_64.whl (176.2 MB)\n",
314
- "Collecting nvidia-nvtx-cu12==12.1.105 (from torch->unstructured-inference==0.7.33->unstructured[pdf])\n",
315
- " Using cached nvidia_nvtx_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (99 kB)\n",
316
- "Requirement already satisfied: triton==2.3.0 in /usr/local/lib/python3.10/dist-packages (from torch->unstructured-inference==0.7.33->unstructured[pdf]) (2.3.0)\n",
317
- "Collecting nvidia-nvjitlink-cu12 (from nvidia-cusolver-cu12==11.4.5.107->torch->unstructured-inference==0.7.33->unstructured[pdf])\n",
318
- " Downloading nvidia_nvjitlink_cu12-12.5.40-py3-none-manylinux2014_x86_64.whl (21.3 MB)\n",
319
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m21.3/21.3 MB\u001b[0m \u001b[31m63.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
320
- "\u001b[?25hRequirement already satisfied: tokenizers<0.20,>=0.19 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.25.1->unstructured-inference==0.7.33->unstructured[pdf]) (0.19.1)\n",
321
- "Collecting httptools>=0.5.0 (from uvicorn>=0.14.0->gradio)\n",
322
- " Downloading httptools-0.6.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (341 kB)\n",
323
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m341.4/341.4 kB\u001b[0m \u001b[31m34.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
324
- "\u001b[?25hCollecting python-dotenv>=0.13 (from uvicorn>=0.14.0->gradio)\n",
325
- " Downloading python_dotenv-1.0.1-py3-none-any.whl (19 kB)\n",
326
- "Collecting uvloop!=0.15.0,!=0.15.1,>=0.14.0 (from uvicorn>=0.14.0->gradio)\n",
327
- " Downloading uvloop-0.19.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.4 MB)\n",
328
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.4/3.4 MB\u001b[0m \u001b[31m94.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
329
- "\u001b[?25hCollecting watchfiles>=0.13 (from uvicorn>=0.14.0->gradio)\n",
330
- " Downloading watchfiles-0.22.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)\n",
331
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.2/1.2 MB\u001b[0m \u001b[31m75.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
332
- "\u001b[?25hRequirement already satisfied: scipy in /usr/local/lib/python3.10/dist-packages (from layoutparser->unstructured-inference==0.7.33->unstructured[pdf]) (1.11.4)\n",
333
- "Collecting iopath (from layoutparser->unstructured-inference==0.7.33->unstructured[pdf])\n",
334
- " Downloading iopath-0.1.10.tar.gz (42 kB)\n",
335
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m42.2/42.2 kB\u001b[0m \u001b[31m6.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
336
- "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
337
- "Collecting pdfplumber (from layoutparser->unstructured-inference==0.7.33->unstructured[pdf])\n",
338
- " Downloading pdfplumber-0.11.0-py3-none-any.whl (56 kB)\n",
339
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m56.4/56.4 kB\u001b[0m \u001b[31m9.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
340
- "\u001b[?25hRequirement already satisfied: pycparser in /usr/local/lib/python3.10/dist-packages (from cffi>=1.12->cryptography>=36.0.0->pdfminer.six->unstructured[pdf]) (2.22)\n",
341
- "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.10/dist-packages (from markdown-it-py>=2.2.0->rich>=10.11.0->typer<1.0,>=0.12->gradio) (0.1.2)\n",
342
- "Requirement already satisfied: pyasn1<0.7.0,>=0.4.6 in /usr/local/lib/python3.10/dist-packages (from pyasn1-modules>=0.2.1->google-auth!=2.24.0,!=2.25.0,<3.0.0dev,>=2.14.1->google-cloud-vision->unstructured[pdf]) (0.6.0)\n",
343
- "Collecting humanfriendly>=9.1 (from coloredlogs->onnxruntime>=1.17.0->unstructured-inference==0.7.33->unstructured[pdf])\n",
344
- " Downloading humanfriendly-10.0-py2.py3-none-any.whl (86 kB)\n",
345
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.8/86.8 kB\u001b[0m \u001b[31m13.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
346
- "\u001b[?25hCollecting portalocker (from iopath->layoutparser->unstructured-inference==0.7.33->unstructured[pdf])\n",
347
- " Downloading portalocker-2.8.2-py3-none-any.whl (17 kB)\n",
348
- "Collecting pypdfium2>=4.18.0 (from pdfplumber->layoutparser->unstructured-inference==0.7.33->unstructured[pdf])\n",
349
- " Downloading pypdfium2-4.30.0-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.8 MB)\n",
350
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.8/2.8 MB\u001b[0m \u001b[31m89.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
351
- "\u001b[?25hRequirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->onnxruntime>=1.17.0->unstructured-inference==0.7.33->unstructured[pdf]) (1.3.0)\n",
352
- "Building wheels for collected packages: ffmpy, langdetect, antlr4-python3-runtime, iopath\n",
353
- " Building wheel for ffmpy (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
354
- " Created wheel for ffmpy: filename=ffmpy-0.3.2-py3-none-any.whl size=5584 sha256=c6158ddca0a5ac6ddb324af130fe12151e87612da3f6b323233a0369341383b2\n",
355
- " Stored in directory: /root/.cache/pip/wheels/bd/65/9a/671fc6dcde07d4418df0c592f8df512b26d7a0029c2a23dd81\n",
356
- " Building wheel for langdetect (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
357
- " Created wheel for langdetect: filename=langdetect-1.0.9-py3-none-any.whl size=993227 sha256=e52141bad163b0cafaa99e943d79a52ad55c5c2e8c163dfc7521756958cf7e3a\n",
358
- " Stored in directory: /root/.cache/pip/wheels/95/03/7d/59ea870c70ce4e5a370638b5462a7711ab78fba2f655d05106\n",
359
- " Building wheel for antlr4-python3-runtime (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
360
- " Created wheel for antlr4-python3-runtime: filename=antlr4_python3_runtime-4.9.3-py3-none-any.whl size=144554 sha256=b4495c0ee1540a8f6f26eda0e070835c650ab1274ebf4fab6f35a92ba8398511\n",
361
- " Stored in directory: /root/.cache/pip/wheels/12/93/dd/1f6a127edc45659556564c5730f6d4e300888f4bca2d4c5a88\n",
362
- " Building wheel for iopath (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
363
- " Created wheel for iopath: filename=iopath-0.1.10-py3-none-any.whl size=31532 sha256=a8d364a88b1826ef839088631b9cae5695642d9aef9d532080090552f9aede68\n",
364
- " Stored in directory: /root/.cache/pip/wheels/9a/a3/b6/ac0fcd1b4ed5cfeb3db92e6a0e476cfd48ed0df92b91080c1d\n",
365
- "Successfully built ffmpy langdetect antlr4-python3-runtime iopath\n",
366
- "Installing collected packages: pydub, filetype, ffmpy, antlr4-python3-runtime, websockets, uvloop, ujson, tomlkit, shellingham, semantic-version, ruff, rapidfuzz, python-multipart, python-magic, python-iso639, python-dotenv, pypdfium2, pypdf, portalocker, pillow, packaging, orjson, ordered-set, onnx, omegaconf, nvidia-nvtx-cu12, nvidia-nvjitlink-cu12, nvidia-nccl-cu12, nvidia-curand-cu12, nvidia-cufft-cu12, nvidia-cuda-runtime-cu12, nvidia-cuda-nvrtc-cu12, nvidia-cuda-cupti-cu12, nvidia-cublas-cu12, mypy-extensions, langdetect, jsonpointer, jsonpath-python, humanfriendly, httptools, h11, emoji, dnspython, Deprecated, backoff, aiofiles, watchfiles, uvicorn, unstructured.pytesseract, typing-inspect, starlette, pytesseract, pillow-heif, pikepdf, pdf2image, nvidia-cusparse-cu12, nvidia-cudnn-cu12, marshmallow, jsonpatch, iopath, httpcore, email_validator, deepdiff, coloredlogs, typer, pdfminer.six, onnxruntime, nvidia-cusolver-cu12, langsmith, httpx, dataclasses-json, unstructured-client, pdfplumber, langchain-core, gradio-client, fastapi-cli, unstructured, layoutparser, langchain-text-splitters, google-cloud-vision, fastapi, timm, langchain, gradio, unstructured-inference, langchain-community, effdet\n",
367
- " Attempting uninstall: pillow\n",
368
- " Found existing installation: Pillow 9.4.0\n",
369
- " Uninstalling Pillow-9.4.0:\n",
370
- " Successfully uninstalled Pillow-9.4.0\n",
371
- " Attempting uninstall: packaging\n",
372
- " Found existing installation: packaging 24.0\n",
373
- " Uninstalling packaging-24.0:\n",
374
- " Successfully uninstalled packaging-24.0\n",
375
- " Attempting uninstall: typer\n",
376
- " Found existing installation: typer 0.9.4\n",
377
- " Uninstalling typer-0.9.4:\n",
378
- " Successfully uninstalled typer-0.9.4\n",
379
- "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
380
- "imageio 2.31.6 requires pillow<10.1.0,>=8.3.2, but you have pillow 10.3.0 which is incompatible.\n",
381
- "spacy 3.7.4 requires typer<0.10.0,>=0.3.0, but you have typer 0.12.3 which is incompatible.\n",
382
- "weasel 0.3.4 requires typer<0.10.0,>=0.3.0, but you have typer 0.12.3 which is incompatible.\u001b[0m\u001b[31m\n",
383
- "\u001b[0mSuccessfully installed Deprecated-1.2.14 aiofiles-23.2.1 antlr4-python3-runtime-4.9.3 backoff-2.2.1 coloredlogs-15.0.1 dataclasses-json-0.6.6 deepdiff-7.0.1 dnspython-2.6.1 effdet-0.4.1 email_validator-2.1.1 emoji-2.12.1 fastapi-0.111.0 fastapi-cli-0.0.4 ffmpy-0.3.2 filetype-1.2.0 google-cloud-vision-3.7.2 gradio-4.31.5 gradio-client-0.16.4 h11-0.14.0 httpcore-1.0.5 httptools-0.6.1 httpx-0.27.0 humanfriendly-10.0 iopath-0.1.10 jsonpatch-1.33 jsonpath-python-1.0.6 jsonpointer-2.4 langchain-0.2.1 langchain-community-0.2.1 langchain-core-0.2.1 langchain-text-splitters-0.2.0 langdetect-1.0.9 langsmith-0.1.63 layoutparser-0.3.4 marshmallow-3.21.2 mypy-extensions-1.0.0 nvidia-cublas-cu12-12.1.3.1 nvidia-cuda-cupti-cu12-12.1.105 nvidia-cuda-nvrtc-cu12-12.1.105 nvidia-cuda-runtime-cu12-12.1.105 nvidia-cudnn-cu12-8.9.2.26 nvidia-cufft-cu12-11.0.2.54 nvidia-curand-cu12-10.3.2.106 nvidia-cusolver-cu12-11.4.5.107 nvidia-cusparse-cu12-12.1.0.106 nvidia-nccl-cu12-2.20.5 nvidia-nvjitlink-cu12-12.5.40 nvidia-nvtx-cu12-12.1.105 omegaconf-2.3.0 onnx-1.16.1 onnxruntime-1.18.0 ordered-set-4.1.0 orjson-3.10.3 packaging-23.2 pdf2image-1.17.0 pdfminer.six-20231228 pdfplumber-0.11.0 pikepdf-8.15.1 pillow-10.3.0 pillow-heif-0.16.0 portalocker-2.8.2 pydub-0.25.1 pypdf-4.2.0 pypdfium2-4.30.0 pytesseract-0.3.10 python-dotenv-1.0.1 python-iso639-2024.4.27 python-magic-0.4.27 python-multipart-0.0.9 rapidfuzz-3.9.1 ruff-0.4.6 semantic-version-2.10.0 shellingham-1.5.4 starlette-0.37.2 timm-1.0.3 tomlkit-0.12.0 typer-0.12.3 typing-inspect-0.9.0 ujson-5.10.0 unstructured-0.14.2 unstructured-client-0.22.0 unstructured-inference-0.7.33 unstructured.pytesseract-0.3.12 uvicorn-0.30.0 uvloop-0.19.0 watchfiles-0.22.0 websockets-11.0.3\n"
384
- ]
385
- },
386
- {
387
- "output_type": "display_data",
388
- "data": {
389
- "application/vnd.colab-display-data+json": {
390
- "pip_warning": {
391
- "packages": [
392
- "PIL",
393
- "google",
394
- "pydevd_plugins"
395
- ]
396
- },
397
- "id": "b31a06ac32884159a2d3a4b477ae70e7"
398
- }
399
- },
400
- "metadata": {}
401
- }
402
- ],
403
- "source": [
404
- "!pip install gradio unstructured[pdf] langchain-community"
405
- ]
406
- },
407
- {
408
- "cell_type": "code",
409
- "source": [
410
- "import getpass\n",
411
- "\n",
412
- "DIFY_BASE_URL = getpass.getpass(\"DIFY_BASE_URL:\")\n",
413
- "DIFY_API_KEY_MYWORKFLOW = getpass.getpass(\"DIFY_API_KEY_MYWORKFLOW: \")\n",
414
- "\n"
415
- ],
416
- "metadata": {
417
- "colab": {
418
- "base_uri": "https://localhost:8080/"
419
- },
420
- "id": "iDbRDVzHqZh8",
421
- "outputId": "dd52051f-fa5a-4443-ff10-a0cac08f7f10"
422
- },
423
- "execution_count": 4,
424
- "outputs": [
425
- {
426
- "name": "stdout",
427
- "output_type": "stream",
428
- "text": [
429
- "DIFY_BASE_URL:··········\n",
430
- "DIFY_API_KEY_MYWORKFLOW: ··········\n"
431
- ]
432
- }
433
- ]
434
- },
435
- {
436
- "cell_type": "code",
437
- "source": [
438
- "import requests\n",
439
- "import json\n",
440
- "url = DIFY_BASE_URL + \"/workflows/run\"\n",
441
- "\n",
442
- "headers = {\n",
443
- " \"Content-Type\": \"application/json\",\n",
444
- " \"Authorization\": f\"Bearer {DIFY_API_KEY_MYWORKFLOW}\"\n",
445
- "}\n",
446
- "\n",
447
- "data = {\n",
448
- " \"inputs\": {\n",
449
- " \"url\":\"\",\n",
450
- " \"knowledge\":\"\"\"\n",
451
- " 本作の悪役。千年以上前に生まれた最初の鬼。鬼達の絶対的支配者で、自身の血を人間に与え大量の鬼を作り出した。炭治郎の家族を殺し、禰󠄀豆子を鬼に変えた仇である。\n",
452
- "鬼達を血に仕込んだ呪いで支配し、「あの方」と呼ばれ恐れられている。外見や攻撃は自由自在で、不死身の鬼を殺すことができる。性格は冷酷非情かつ支配的で、自らの意志に沿わない者は決して許さず、忠実に従っていた下弦の鬼達を些細なことで何ら躊躇なく惨殺したり、報告に来た猗窩座に理不尽な叱責を与えるなどしている。珠世からはその人物像を「いつも何かに怯えている臆病者」と皮肉られている。癇癪で暴力を振るったり、自分を棚に上げた言動をすることも多い。\n",
453
- "\n",
454
- " \"\"\",\n",
455
- " },\n",
456
- " \"query\": \"\", # クエリ(オプション)\n",
457
- " \"response_mode\": \"streaming\", # ストリーミング応答\n",
458
- " \"user\": \"abc_123\", # ユーザーID\n",
459
- "}\n",
460
- "\n",
461
- "response = requests.post(url, headers=headers, json=data, stream=True)\n",
462
- "\n",
463
- "response.raise_for_status() # エラーチェック\n",
464
- "\n",
465
- "response = requests.post(url, headers=headers, json=data, stream=True)\n",
466
- "response.raise_for_status()\n",
467
- "\n",
468
- "assistant_message = \"\"\n",
469
- "outputs = {}\n",
470
- "\n",
471
- "# APIレスポンスのチャンク処理\n",
472
- "for chunk in response.iter_lines(delimiter=b\"\\n\\n\"):\n",
473
- " if chunk:\n",
474
- " chunk_data = chunk.decode(\"utf-8\").strip()\n",
475
- " if chunk_data.startswith(\"data:\"):\n",
476
- " json_data = chunk_data[6:] # \"data: \"を取り除く\n",
477
- " if json_data:\n",
478
- " result = json.loads(json_data)\n",
479
- " if result.get(\"event\") == \"text_chunk\":\n",
480
- " answer = result.get(\"data\", \"\").get(\"text\", \"\")\n",
481
- " assistant_message += str(answer)\n",
482
- " print(str(answer), end=\"\", flush=True)\n",
483
- " elif result.get(\"event\") == \"workflow_finished\":\n",
484
- " outputs = result.get('data', \"\")\n",
485
- " print(assistant_message, outputs)"
486
- ],
487
- "metadata": {
488
- "colab": {
489
- "base_uri": "https://localhost:8080/"
490
- },
491
- "id": "_1LzsFX7rve1",
492
- "outputId": "05d4f072-b7e6-4359-b695-fc72b9cbcd77"
493
- },
494
- "execution_count": 6,
495
- "outputs": [
496
- {
497
- "output_type": "stream",
498
- "name": "stdout",
499
- "text": [
500
- "<explanation>\n",
501
- "## 文章の主要ポイントと解説\n",
502
- "\n",
503
- "この文章は、ある作品に登場する「悪役」のキャラクターについて説明しています。以下に主要なポイントを箇条書きでまとめ、それぞれのポイントを詳しく解説します。\n",
504
- "\n",
505
- "**1. 悪役の正体と能力**\n",
506
- "\n",
507
- "* **千年以上前に生まれた最初の鬼**: このキャラクターは、鬼という種族の始祖であり、非常に古い存在であることがわかります。\n",
508
- "* **鬼達の絶対的支配者**: 他の鬼たちを支配し、彼らを従わせる力を持っています。\n",
509
- "* **自身の血を人間に与え大量の鬼を作り出した**: このキャラクターは、自分の血を人間に与��ることで、人間を鬼に変えることができるという力を持っています。\n",
510
- "* **炭治郎の家族を殺し、禰󠄀豆子を鬼に変えた仇である**: 主人公の家族を殺害し、妹を鬼に変えたという、物語における大きな敵役であることがわかります。\n",
511
- "* **不死身の鬼を殺すことができる**: 鬼は通常、不死身であると考えられていますが、このキャラクターはそれを殺せるほどの力を持っているようです。\n",
512
- "\n",
513
- "**2. 悪役の支配力と性格** \n",
514
- "\n",
515
- "* **鬼達を血に仕込んだ呪いで支配し、「あの方」と呼ばれ恐れられている**: このキャラクターは、鬼達を呪いによって支配しており、彼らから「あの方」と呼ばれ、深い恐怖を抱かれています。\n",
516
- "* **外見や攻撃は自由自在**: このキャラクターは自分の外見や攻撃方法を自由に変化させることができるようです。\n",
517
- "* **冷酷非情かつ支配的**: 自分の意志に従わない者は容赦なく殺し、支配欲が非常に強い人物であることがわかります。\n",
518
- "* **下弦の鬼達を惨殺したり、猗窩座に理不尽な叱責を与える**: 下位の鬼を簡単に殺したり、部下であるはずの鬼に対して理不尽な怒りをぶつけたりするなど、残酷で自己中心的であることがわかります。\n",
519
- "* **珠世からはその人物像を「いつも何かに怯えている臆病者」と皮肉られている**: このキャラクターは、強い力を持っていても、内心では何かに怯えているのではないかと推測されています。\n",
520
- "\n",
521
- "**3. 悪役の行動パターン**\n",
522
- "\n",
523
- "* **癇癪で暴力を振るったり、自分を棚に上げた言動をする**: 感情が激しく、自分の都合の良いように振る舞うことがあります。\n",
524
- "\n",
525
- "## 文章全体の意図\n",
526
- "\n",
527
- "この文章は、作品における重要な敵役である「悪役」のキャラクター像について、その能力、性格、行動パターンなどが詳細に描写されています。読者にこのキャラクターの恐ろしさや危険性を理解させ、物語における彼の重要性を示すことが目的です。\n",
528
- "\n",
529
- "**例えと類推**\n",
530
- "\n",
531
- "* この悪役は、まるで古代の王のように、絶対的な力で鬼たちを支配しています。\n",
532
- "* このキャラクターの行動は、まるで暴君のように、残酷で予測不可能です。\n",
533
- "\n",
534
- "**専門用語の定義**\n",
535
- "\n",
536
- "* **鬼**: 人間を襲い、血を吸って生きる超自然的な存在。\n",
537
- "* **下弦の鬼**: 鬼の中でも比較的弱い存在。\n",
538
- "* **猗窩座**: 悪役に従う、上位の鬼の一人。\n",
539
- "* **珠世**: 主人公の味方となる、鬼殺隊の医者。\n",
540
- "\n",
541
- "**文章の含意**\n",
542
- "\n",
543
- "この悪役は、単なる力を持った敵ではなく、内面的な不安や恐怖を抱えた複雑な人物であることが示唆されています。その複雑さは、彼の行動の予測不能さを生み出し、物語に深みを与えていると考えられます。\n",
544
- "</explanation> \n",
545
- "<explanation>\n",
546
- "## 文章の主要ポイントと解説\n",
547
- "\n",
548
- "この文章は、ある作品に登場する「悪役」のキャラクターについて説明しています。以下に主要なポイントを箇条書きでまとめ、それぞれのポイントを詳しく解説します。\n",
549
- "\n",
550
- "**1. 悪役の正体と能力**\n",
551
- "\n",
552
- "* **千年以上前に生まれた最初の鬼**: このキャラクターは、鬼という種族の始祖であり、非常に古い存在であることがわかります。\n",
553
- "* **鬼達の絶対的支配者**: 他の鬼たちを支配し、彼らを従わせる力を持っています。\n",
554
- "* **自身の血を人間に与え大量の鬼を作り出した**: このキャラクターは、自分の血を人間に与えることで、人間を鬼に変えることができるという力を持っています。\n",
555
- "* **炭治郎の家族を殺し、禰󠄀豆子を鬼に変えた仇である**: 主人公の家族を殺害し、妹を鬼に変えたという、物語における大きな敵役であることがわかります。\n",
556
- "* **不死身の鬼を殺すことができる**: 鬼は通常、不死身であると考えられていますが、このキャラクターはそれを殺せるほどの力を持っているようです。\n",
557
- "\n",
558
- "**2. 悪役の支配力と性格** \n",
559
- "\n",
560
- "* **鬼達を血に仕込んだ呪いで支配し、「あの方」と呼ばれ恐れられている**: このキャラクターは、鬼達を呪いによって支配しており、彼らから「あの方」と呼ばれ、深い恐怖を抱かれています。\n",
561
- "* **外見や攻撃は自由自在**: このキャラクターは自分の外見や攻撃方法を自由に変化させることができるようです。\n",
562
- "* **冷酷非情かつ支配的**: 自分の意志に従わない者は容赦なく殺し、支配欲が非常に強い人物であることがわかります。\n",
563
- "* **下弦の鬼達を惨殺したり、猗窩座に理不尽な叱責を与える**: 下位の鬼を簡単に殺したり、部下であるはずの鬼に対して理不尽な怒りをぶつけたりするなど、残酷で自己中心的であることがわかります。\n",
564
- "* **珠世からはその人物像を「いつも何かに怯えている臆病者」と皮肉られている**: このキャラクターは、強い力を持っていても、内心では何かに怯えているのではないかと推測されています。\n",
565
- "\n",
566
- "**3. 悪役の行動パターン**\n",
567
- "\n",
568
- "* **癇癪で暴力を振るったり、自分を棚に上げた言動をする**: 感情が激しく、自分の都合の良いように振る舞うことがあります。\n",
569
- "\n",
570
- "## 文章全体の意図\n",
571
- "\n",
572
- "この文章は、作品における重要な敵役である「悪役」のキャラクター像について、その能力、性格、行動パターンなどが詳細に描写されています。読者にこのキャラクターの恐ろしさや危険性を理解させ、物語における彼の重要性を示すことが目的です。\n",
573
- "\n",
574
- "**例えと類推**\n",
575
- "\n",
576
- "* この悪役は、まるで古代の王のように、絶対的な力で鬼たちを支配しています。\n",
577
- "* このキャラクターの行動は、まるで暴君のように、残酷で予測不可能です。\n",
578
- "\n",
579
- "**専門用語の定義**\n",
580
- "\n",
581
- "* **鬼**: 人間を襲い、血を吸って生きる超自然的な存在。\n",
582
- "* **下弦の鬼**: 鬼の中でも比較的弱い存在。\n",
583
- "* **猗窩座**: 悪役に従う、上位の鬼の一人。\n",
584
- "* **珠世**: 主人公の味方となる、鬼殺隊の医者。\n",
585
- "\n",
586
- "**文章の含意**\n",
587
- "\n",
588
- "この悪役は、単なる力を持った敵ではなく、内面的な不安や恐怖を抱えた複雑な人物であることが示唆されています。その複雑さは、彼の行動の予測不能さを生み出し、物語に深みを与えていると考えられます。\n",
589
- "</explanation> \n",
590
- " {'id': '4fd39e75-3315-412f-99d3-c26595b98096', 'workflow_id': '68e01ad3-429c-475b-b33b-a7fe8958160d', 'sequence_number': 792, 'status': 'succeeded', 'outputs': {'result': '<explanation>\\n## 文章の主要ポイントと解説\\n\\nこの文章は、ある作品に登場する「悪役」のキャラクターについて説明しています。以下に主要なポイントを箇条書きでまとめ、それぞれのポイントを詳しく解説します。\\n\\n**1. 悪役の正体と能力**\\n\\n* **千年以上前に生まれた最初の鬼**: このキャラクターは、鬼という種族の始祖であり、非常に古い存在であることがわかります。\\n* **鬼達の絶対的支配者**: 他の鬼たちを支配し、彼らを従わせる力を持っています。\\n* **自身の血を人間に与え大量の鬼を作り出した**: このキャラクターは、自分の血を人間に与えることで、人間を鬼に変えることができるという力を持っています。\\n* **炭治郎の家族を殺し、禰󠄀豆子を鬼に変えた仇である**: 主人公の家族を殺害し、妹を鬼に変えたという、物語における大きな敵役であることがわかります。\\n* **不死身の鬼を殺すことができる**: 鬼は通常、不死身であると考えられていますが、このキャラクターはそれを殺せるほどの力を持っているようです。\\n\\n**2. 悪役の支配力と性格** \\n\\n* **鬼達を血に仕込んだ呪いで支配し、「あの方」と呼ばれ恐れられている**: このキャラクターは、鬼達を呪いによって支配しており、彼らから「あの方」と呼ばれ、深い恐怖を抱かれています。\\n* **外見や攻撃は自由��在**: このキャラクターは自分の外見や攻撃方法を自由に変化させることができるようです。\\n* **冷酷非情かつ支配的**: 自分の意志に従わない者は容赦なく殺し、支配欲が非常に強い人物であることがわかります。\\n* **下弦の鬼達を惨殺したり、猗窩座に理不尽な叱責を与える**: 下位の鬼を簡単に殺したり、部下であるはずの鬼に対して理不尽な怒りをぶつけたりするなど、残酷で自己中心的であることがわかります。\\n* **珠世からはその人物像を「いつも何かに怯えている臆病者」と皮肉られている**: このキャラクターは、強い力を持っていても、内心では何かに怯えているのではないかと推測されています。\\n\\n**3. 悪役の行動パターン**\\n\\n* **癇癪で暴力を振るったり、自分を棚に上げた言動をする**: 感情が激しく、自分の都合の良いように振る舞うことがあります。\\n\\n## 文章全体の意図\\n\\nこの文章は、作品における重要な敵役である「悪役」のキャラクター像について、その能力、性格、行動パターンなどが詳細に描写されています。読者にこのキャラクターの恐ろしさや危険性を理解させ、物語における彼の重要性を示すことが目的です。\\n\\n**例えと類推**\\n\\n* この悪役は、まるで古代の王のように、絶対的な力で鬼たちを支配しています。\\n* このキャラクターの行動は、まるで暴君のように、残酷で予測不可能です。\\n\\n**専門用語の定義**\\n\\n* **鬼**: 人間を襲い、血を吸って生きる超自然的な存在。\\n* **下弦の鬼**: 鬼の中でも比較的弱い存在。\\n* **猗窩座**: 悪役に従う、上位の鬼の一人。\\n* **珠世**: 主人公の味方となる、鬼殺隊の医者。\\n\\n**文章の含意**\\n\\nこの悪役は、単なる力を持った敵ではなく、内面的な不安や恐怖を抱えた複雑な人物であることが示唆されています。その複雑さは、彼の行動の予測不能さを生み出し、物語に深みを与えていると考えられます。\\n</explanation> \\n', 'raw_content': '\\n 本作の悪役。千年以上前に生まれた最初の鬼。鬼達の絶対的支配者で、自身の血を人間に与え大量の鬼を作り出した。炭治郎の家族を殺し、禰󠄀豆子を鬼に変えた仇である。\\n鬼達を血に仕込んだ呪いで支配し、「あの方」と呼ばれ恐れられている。外見や攻撃は自由自在で、不死身の鬼を殺すことができる。性格は冷酷非情かつ支配的で、自らの意志に沿わない者は決して許さず、忠実に従っていた下弦の鬼達を些細なことで何ら躊躇なく惨殺したり、報告に来た猗窩座に理不尽な叱責を与えるなどしている。珠世からはその人物像を「いつも何かに怯えている臆病者」と皮肉られている。癇癪で暴力を振るったり、自分を棚に上げた言動をすることも多い。\\n \\n ', 'url': ''}, 'error': None, 'elapsed_time': 15.112371566006914, 'total_tokens': 1021, 'total_steps': 5, 'created_by': {'id': 'f3770b11-c8be-460f-89fe-67ab0166431d', 'user': 'abc_123'}, 'created_at': 1716941559, 'finished_at': 1716941574, 'files': []}\n"
591
- ]
592
- }
593
- ]
594
- },
595
- {
596
- "cell_type": "code",
597
- "source": [
598
- "import gradio as gr\n",
599
- "import requests\n",
600
- "from langchain_community.document_loaders import UnstructuredPDFLoader\n",
601
- "import json\n",
602
- "\n",
603
- "def run_workflow(message):\n",
604
- " try:\n",
605
- " file = message['files'][0]\n",
606
- " text_message = message['text']\n",
607
- "\n",
608
- " # PDFファイルが選択されているかチェック\n",
609
- " if not file:\n",
610
- " return \"PDFファイルを選択してください。\", \"\"\n",
611
- "\n",
612
- " # PDFファイルをロードしてテキストを抽出\n",
613
- " loader = UnstructuredPDFLoader(file)\n",
614
- " data = loader.load()\n",
615
- " raw_text = data[0].page_content\n",
616
- "\n",
617
- " # APIリクエストのための入力データを準備\n",
618
- " inputs = {\n",
619
- " \"url\": \"\",\n",
620
- " \"knowledge\": raw_text\n",
621
- " }\n",
622
- "\n",
623
- " yield raw_text, \"loading...\", {}\n",
624
- "\n",
625
- " # APIエンドポイントURL\n",
626
- " url = DIFY_BASE_URL + \"/workflows/run\"\n",
627
- "\n",
628
- " # APIリクエストのヘッダー\n",
629
- " headers = {\n",
630
- " \"Content-Type\": \"application/json\",\n",
631
- " \"Authorization\": f\"Bearer {DIFY_API_KEY_MYWORKFLOW}\"\n",
632
- " }\n",
633
- "\n",
634
- " # APIリクエストのデータ\n",
635
- " data = {\n",
636
- " \"inputs\": inputs,\n",
637
- " \"query\": \"\",\n",
638
- " \"response_mode\": \"streaming\",\n",
639
- " \"user\": \"abc_123\",\n",
640
- " }\n",
641
- "\n",
642
- " # APIにリクエストを送信\n",
643
- " response = requests.post(url, headers=headers, json=data, stream=True)\n",
644
- " response.raise_for_status()\n",
645
- "\n",
646
- " assistant_message = \"\"\n",
647
- " outputs = {}\n",
648
- "\n",
649
- " # APIレスポンスのチャンク処理\n",
650
- " for chunk in response.iter_lines(delimiter=b\"\\n\\n\"):\n",
651
- " if chunk:\n",
652
- " chunk_data = chunk.decode(\"utf-8\").strip()\n",
653
- " if chunk_data.startswith(\"data:\"):\n",
654
- " json_data = chunk_data[6:] # \"data: \"を取り除く\n",
655
- " if json_data:\n",
656
- " result = json.loads(json_data)\n",
657
- " if result.get(\"event\") == \"text_chunk\":\n",
658
- " answer = result.get(\"data\", \"\").get(\"text\", \"\")\n",
659
- " assistant_message += str(answer)\n",
660
- " yield raw_text, assistant_message, result.get(\"data\", \"\")\n",
661
- " elif result.get(\"event\") == \"workflow_finished\":\n",
662
- " outputs = result.get('data', \"\")\n",
663
- " yield raw_text, assistant_message, outputs\n",
664
- "\n",
665
- " except Exception as e:\n",
666
- " error_message = str(e)\n",
667
- " print(f\"Error: {error_message}\")\n",
668
- " return \"error\", error_message, {}\n",
669
- "\n",
670
- "# Gradioインターフェイスの設定\n",
671
- "iface = gr.Interface(\n",
672
- " fn=run_workflow,\n",
673
- " inputs=[gr.MultimodalTextbox(label=\"PDFファイルをアップロード\", file_types=[\".pdf\"], interactive=True)],\n",
674
- " outputs=[\n",
675
- " gr.Textbox(label=\"生テキスト\", show_copy_button=True, max_lines=5),\n",
676
- " gr.Markdown(),\n",
677
- " gr.JSON()\n",
678
- " ],\n",
679
- " title=\"PDF to Dify Workflow\",\n",
680
- " description=\"PDFファイルを入力すると、Dify APIのワークフローによって処理された結果が表示されます。\",\n",
681
- " article=\"\"\"\n",
682
- "\n",
683
- " © 2024 @tregu0458. All rights reserved.\n",
684
- "\n",
685
- " ## 使用コンポーネント\n",
686
- " - dify\n",
687
- " - gradio\n",
688
- " - langchain_community.document_loaders\n",
689
- "\n",
690
- " ## 今回のworkflowの仕様\n",
691
- " ### 入力\n",
692
- " - url\n",
693
- " - knowledge\n",
694
- " ### 出力\n",
695
- " - result\n",
696
- " - row_content\n",
697
- " - url\n",
698
- " ### LLM\n",
699
- " - gemini-1.5-flash\n",
700
- " ```\n",
701
- " PDFファイルを入力として受け取り、Dify APIのワークフローを使用してファイルを処理し、結果を返す関数。\n",
702
- " Args:\n",
703
- " message (dict): 入力メッセージ。以下のキーを含む辞書。\n",
704
- " - 'files' (list): アップロードされたPDFファイルのリスト。\n",
705
- " - 'text' (str): テキストメッセージ。\n",
706
- " Yields:\n",
707
- " tuple: 以下の要素を含むタプル。\n",
708
- " - raw_text (str): PDFファイルから抽出された生テキスト。\n",
709
- " - assistant_message (str): アシスタントからのメッセージ。\n",
710
- " - outputs (dict): APIレスポンスのデータ。\n",
711
- " Returns:\n",
712
- " tuple: 以下の要素を含むタプル。\n",
713
- " - status (str): 処理の状態。\"error\" または \"\" (空文字列)。\n",
714
- " - error_message (str): エラーメッセージ (エラーが発生した場合)。\n",
715
- " - data (dict): APIレスポンスのデータ。\n",
716
- " Raises:\n",
717
- " Exception: 処理中にエラーが発生した場合。\n",
718
- " Notes:\n",
719
- " - 関数は非同期的に実行され、処理の進行状況に応じて段階的に結果を返す。\n",
720
- " - `yield` を使用して、処理の途中経過を表示しながら、最終的な結果を返す。\n",
721
- " ```\n",
722
- " \"\"\"\n",
723
- ")\n",
724
- "\n",
725
- "if __name__ == \"__main__\":\n",
726
- " iface.queue().launch()"
727
- ],
728
- "metadata": {
729
- "colab": {
730
- "base_uri": "https://localhost:8080/",
731
- "height": 631
732
- },
733
- "id": "yx9f1RwJtFi9",
734
- "outputId": "303ff661-cf45-4fd1-f5c3-650b3526ebaf"
735
- },
736
- "execution_count": 7,
737
- "outputs": [
738
- {
739
- "output_type": "stream",
740
- "name": "stdout",
741
- "text": [
742
- "Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).\n",
743
- "\n",
744
- "Colab notebook detected. To show errors in colab notebook, set debug=True in launch()\n",
745
- "Running on public URL: https://3e94710286284f59a9.gradio.live\n",
746
- "\n",
747
- "This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)\n"
748
- ]
749
- },
750
- {
751
- "output_type": "display_data",
752
- "data": {
753
- "text/plain": [
754
- "<IPython.core.display.HTML object>"
755
- ],
756
- "text/html": [
757
- "<div><iframe src=\"https://3e94710286284f59a9.gradio.live\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
758
- ]
759
- },
760
- "metadata": {}
761
- }
762
- ]
763
- }
764
- ]
765
- }