Spaces:
Runtime error
Runtime error
File size: 10,725 Bytes
daa68f6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 |
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "2RCxpRzpqPrB"
},
"outputs": [],
"source": [
"!pip install gradio unstructured[pdf] langchain-community"
]
},
{
"cell_type": "code",
"source": [
"import getpass\n",
"\n",
"DIFY_BASE_URL = getpass.getpass(\"DIFY_BASE_URL:\")\n",
"DIFY_API_KEY_MYWORKFLOW = getpass.getpass(\"DIFY_API_KEY_MYWORKFLOW: \")\n",
"\n"
],
"metadata": {
"id": "iDbRDVzHqZh8"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"import requests\n",
"import json\n",
"url = DIFY_BASE_URL + \"/workflows/run\"\n",
"\n",
"headers = {\n",
" \"Content-Type\": \"application/json\",\n",
" \"Authorization\": f\"Bearer {DIFY_API_KEY_MYWORKFLOW}\"\n",
"}\n",
"\n",
"data = {\n",
" \"inputs\": {\n",
" \"knowledge\":\"\"\"\n",
" 本作の悪役。千年以上前に生まれた最初の鬼。鬼達の絶対的支配者で、自身の血を人間に与え大量の鬼を作り出した。炭治郎の家族を殺し、禰󠄀豆子を鬼に変えた仇である。\n",
"鬼達を血に仕込んだ呪いで支配し、「あの方」と呼ばれ恐れられている。外見や攻撃は自由自在で、不死身の鬼を殺すことができる。性格は冷酷非情かつ支配的で、自らの意志に沿わない者は決して許さず、忠実に従っていた下弦の鬼達を些細なことで何ら躊躇なく惨殺したり、報告に来た猗窩座に理不尽な叱責を与えるなどしている。珠世からはその人物像を「いつも何かに怯えている臆病者」と皮肉られている。癇癪で暴力を振るったり、自分を棚に上げた言動をすることも多い。\n",
"\n",
" \"\"\",\n",
" },\n",
" \"query\": \"\", # クエリ(オプション)\n",
" \"response_mode\": \"streaming\", # ストリーミング応答\n",
" \"user\": \"abc_123\", # ユーザーID\n",
"}\n",
"\n",
"response = requests.post(url, headers=headers, json=data, stream=True)\n",
"\n",
"response.raise_for_status() # エラーチェック\n",
"\n",
"response = requests.post(url, headers=headers, json=data, stream=True)\n",
"response.raise_for_status()\n",
"\n",
"assistant_message = \"\"\n",
"outputs = {}\n",
"\n",
"# APIレスポンスのチャンク処理\n",
"for chunk in response.iter_lines(delimiter=b\"\\n\\n\"):\n",
" if chunk:\n",
" chunk_data = chunk.decode(\"utf-8\").strip()\n",
" if chunk_data.startswith(\"data:\"):\n",
" json_data = chunk_data[6:] # \"data: \"を取り除く\n",
" if json_data:\n",
" result = json.loads(json_data)\n",
" if result.get(\"event\") == \"text_chunk\":\n",
" answer = result.get(\"data\", \"\").get(\"text\", \"\")\n",
" assistant_message += str(answer)\n",
" print(str(answer), end=\"\", flush=True)\n",
" elif result.get(\"event\") == \"workflow_finished\":\n",
" outputs = result.get('data', \"\")\n",
" print(assistant_message, outputs)"
],
"metadata": {
"id": "_1LzsFX7rve1"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"import gradio as gr\n",
"import requests\n",
"from langchain_community.document_loaders import UnstructuredPDFLoader\n",
"import json\n",
"\n",
"def run_workflow(message):\n",
" try:\n",
" file = message['files'][0]\n",
" text_message = message['text']\n",
"\n",
" # PDFファイルが選択されているかチェック\n",
" if not file:\n",
" return \"PDFファイルを選択してください。\", \"\"\n",
"\n",
" # PDFファイルをロードしてテキストを抽出\n",
" loader = UnstructuredPDFLoader(file)\n",
" data = loader.load()\n",
" raw_text = data[0].page_content\n",
"\n",
" # APIリクエストのための入力データを準備\n",
" inputs = {\n",
" \"knowledge\": raw_text\n",
" }\n",
"\n",
" yield raw_text, \"loading...\", {}\n",
"\n",
" # APIエンドポイントURL\n",
" url = DIFY_BASE_URL + \"/workflows/run\"\n",
"\n",
" # APIリクエストのヘッダー\n",
" headers = {\n",
" \"Content-Type\": \"application/json\",\n",
" \"Authorization\": f\"Bearer {DIFY_API_KEY_MYWORKFLOW}\"\n",
" }\n",
"\n",
" # APIリクエストのデータ\n",
" data = {\n",
" \"inputs\": inputs,\n",
" \"query\": \"\",\n",
" \"response_mode\": \"streaming\",\n",
" \"user\": \"abc_123\",\n",
" }\n",
"\n",
" # APIにリクエストを送信\n",
" response = requests.post(url, headers=headers, json=data, stream=True)\n",
" response.raise_for_status()\n",
"\n",
" assistant_message = \"\"\n",
" outputs = {}\n",
"\n",
" # APIレスポンスのチャンク処理\n",
" for chunk in response.iter_lines(delimiter=b\"\\n\\n\"):\n",
" if chunk:\n",
" chunk_data = chunk.decode(\"utf-8\").strip()\n",
" if chunk_data.startswith(\"data:\"):\n",
" json_data = chunk_data[6:] # \"data: \"を取り除く\n",
" if json_data:\n",
" result = json.loads(json_data)\n",
" if result.get(\"event\") == \"text_chunk\":\n",
" answer = result.get(\"data\", \"\").get(\"text\", \"\")\n",
" assistant_message += str(answer)\n",
" yield raw_text, assistant_message, result.get(\"data\", \"\")\n",
" elif result.get(\"event\") == \"workflow_finished\":\n",
" outputs = result.get('data', \"\")\n",
" yield raw_text, assistant_message, outputs\n",
"\n",
" except Exception as e:\n",
" error_message = str(e)\n",
" print(f\"Error: {error_message}\")\n",
" return \"error\", error_message, {}\n",
"\n",
"# Gradioインターフェイスの設定\n",
"iface = gr.Interface(\n",
" fn=run_workflow,\n",
" inputs=[gr.MultimodalTextbox(label=\"PDFファイルをアップロード\", file_types=[\".pdf\"], interactive=True)],\n",
" outputs=[\n",
" gr.Textbox(label=\"生テキスト\", show_copy_button=True, max_lines=5),\n",
" gr.Markdown(),\n",
" gr.JSON()\n",
" ],\n",
" title=\"PDF to Dify Workflow\",\n",
" description=\"PDFファイルを入力すると、Dify APIのワークフローによって処理された結果が表示されます。\",\n",
" article=\"\"\"\n",
"\n",
" © 2024 @tregu0458. All rights reserved.\n",
"\n",
" ## 使用コンポーネント\n",
" - dify\n",
" - gradio\n",
" - langchain_community.document_loaders\n",
"\n",
" ## 今回のworkflowの仕様\n",
" ### 入力\n",
" - knowledge\n",
" ### 出力\n",
" - result\n",
" - row_content\n",
" ### LLM\n",
" - gemini-1.5-flash\n",
" ```\n",
" PDFファイルを入力として受け取り、Dify APIのワークフローを使用してファイルを処理し、結果を返す関数。\n",
" Args:\n",
" message (dict): 入力メッセージ。以下のキーを含む辞書。\n",
" - 'files' (list): アップロードされたPDFファイルのリスト。\n",
" - 'text' (str): テキストメッセージ。\n",
" Yields:\n",
" tuple: 以下の要素を含むタプル。\n",
" - raw_text (str): PDFファイルから抽出された生テキスト。\n",
" - assistant_message (str): アシスタントからのメッセージ。\n",
" - outputs (dict): APIレスポンスのデータ。\n",
" Returns:\n",
" tuple: 以下の要素を含むタプル。\n",
" - status (str): 処理の状態。\"error\" または \"\" (空文字列)。\n",
" - error_message (str): エラーメッセージ (エラーが発生した場合)。\n",
" - data (dict): APIレスポンスのデータ。\n",
" Raises:\n",
" Exception: 処理中にエラーが発生した場合。\n",
" Notes:\n",
" - 関数は非同期的に実行され、処理の進行状況に応じて段階的に結果を返す。\n",
" - `yield` を使用して、処理の途中経過を表示しながら、最終的な結果を返す。\n",
" ```\n",
" \"\"\"\n",
")\n",
"\n",
"if __name__ == \"__main__\":\n",
" iface.queue().launch()"
],
"metadata": {
"id": "yx9f1RwJtFi9"
},
"execution_count": null,
"outputs": []
}
]
} |