Spaces:
Runtime error
Runtime error
Commit ·
3291faa
1
Parent(s): 875054f
Upload 11 files
Browse files- app.py +18 -51
- common.py +142 -20
- log.py +5 -0
- pages/Chatbot.py +5 -29
- pages/ChatbotWebRead.py +20 -21
- pages/ImportExcelFile.py +4 -63
- pages/ImportFile.py +3 -65
- pages/ImportPdfFile.py +24 -0
- pages/ImportPptxFile.py +3 -62
- pages/ImportWordFile.py +24 -0
- requirements.txt +9 -1
app.py
CHANGED
|
@@ -2,28 +2,23 @@ import streamlit as st
|
|
| 2 |
import os
|
| 3 |
import pickle
|
| 4 |
import faiss
|
| 5 |
-
import
|
| 6 |
|
| 7 |
from multiprocessing import Lock
|
| 8 |
from multiprocessing.managers import BaseManager
|
| 9 |
from llama_index.callbacks import CallbackManager, LlamaDebugHandler
|
| 10 |
-
from llama_index import VectorStoreIndex,
|
| 11 |
-
from llama_index.chat_engine import CondenseQuestionChatEngine;
|
| 12 |
from llama_index.node_parser import SimpleNodeParser
|
| 13 |
from llama_index.langchain_helpers.text_splitter import TokenTextSplitter
|
| 14 |
from llama_index.constants import DEFAULT_CHUNK_OVERLAP
|
| 15 |
-
from llama_index.response_synthesizers import get_response_synthesizer
|
| 16 |
from llama_index.vector_stores.faiss import FaissVectorStore
|
| 17 |
from llama_index.graph_stores import SimpleGraphStore
|
| 18 |
from llama_index.storage.docstore import SimpleDocumentStore
|
| 19 |
from llama_index.storage.index_store import SimpleIndexStore
|
| 20 |
from msal_streamlit_authentication import msal_authentication
|
| 21 |
import tiktoken
|
| 22 |
-
|
| 23 |
-
from requests_oauthlib import OAuth2Session
|
| 24 |
-
from time import time
|
| 25 |
from dotenv import load_dotenv
|
| 26 |
-
from streamlit import net_util
|
| 27 |
|
| 28 |
load_dotenv()
|
| 29 |
|
|
@@ -40,44 +35,27 @@ AUTHORITY = f"https://login.microsoftonline.com/{TENANT_ID}"
|
|
| 40 |
REDIRECT_URI = os.environ["REDIRECT_URI"]
|
| 41 |
SCOPES = ["openid", "profile", "User.Read"]
|
| 42 |
|
| 43 |
-
index_name =
|
| 44 |
-
pkl_name =
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
以下はこれまでの会話履歴と、ドキュメントを検索して回答する必要がある、ユーザーからの会話文です。
|
| 48 |
-
会話と新しい会話文に基づいて、検索クエリを作成します。回答は日本語で行います。
|
| 49 |
-
新しい会話文が挨拶の場合、挨拶を返してください。
|
| 50 |
-
新しい会話文が質問の場合、検索した結果の回答を返してください。
|
| 51 |
-
答えがわからない場合は正直にわからないと回答してください。
|
| 52 |
-
会話履歴:
|
| 53 |
-
{chat_history}
|
| 54 |
-
新しい会話文:
|
| 55 |
-
{question}
|
| 56 |
-
Search query:
|
| 57 |
-
""")
|
| 58 |
-
|
| 59 |
-
chat_history = []
|
| 60 |
-
|
| 61 |
-
logging.basicConfig(level=logging.INFO)
|
| 62 |
-
logger = logging.getLogger("__name__")
|
| 63 |
-
logger.debug("調査用ログ")
|
| 64 |
|
| 65 |
def initialize_index():
|
| 66 |
logger.info("initialize_index start")
|
| 67 |
-
text_splitter = TokenTextSplitter(
|
| 68 |
, chunk_overlap=DEFAULT_CHUNK_OVERLAP
|
| 69 |
-
, tokenizer=tiktoken.encoding_for_model("gpt-
|
| 70 |
node_parser = SimpleNodeParser(text_splitter=text_splitter)
|
| 71 |
d = 1536
|
| 72 |
k=2
|
| 73 |
faiss_index = faiss.IndexFlatL2(d)
|
| 74 |
# デバッグ用
|
| 75 |
-
|
| 76 |
-
callback_manager = CallbackManager([llama_debug_handler])
|
| 77 |
service_context = ServiceContext.from_defaults(node_parser=node_parser,callback_manager=callback_manager)
|
| 78 |
lock = Lock()
|
| 79 |
with lock:
|
| 80 |
if os.path.exists(index_name):
|
|
|
|
| 81 |
storage_context = StorageContext.from_defaults(
|
| 82 |
docstore=SimpleDocumentStore.from_persist_dir(persist_dir=index_name),
|
| 83 |
graph_store=SimpleGraphStore.from_persist_dir(persist_dir=index_name),
|
|
@@ -85,29 +63,17 @@ def initialize_index():
|
|
| 85 |
index_store=SimpleIndexStore.from_persist_dir(persist_dir=index_name),
|
| 86 |
)
|
| 87 |
st.session_state.index = load_index_from_storage(storage_context=storage_context,service_context=service_context)
|
| 88 |
-
|
| 89 |
-
st.session_state.query_engine = st.session_state.index.as_query_engine(response_synthesizer=response_synthesizer,service_context=service_context)
|
| 90 |
-
st.session_state.chat_engine = CondenseQuestionChatEngine.from_defaults(
|
| 91 |
-
query_engine=st.session_state.query_engine,
|
| 92 |
-
condense_question_prompt=custom_prompt,
|
| 93 |
-
chat_history=chat_history,
|
| 94 |
-
verbose=True
|
| 95 |
-
)
|
| 96 |
else:
|
|
|
|
| 97 |
documents = SimpleDirectoryReader("./documents").load_data()
|
| 98 |
vector_store = FaissVectorStore(faiss_index=faiss_index)
|
| 99 |
storage_context = StorageContext.from_defaults(vector_store=vector_store)
|
| 100 |
st.session_state.index = VectorStoreIndex.from_documents(documents, storage_context=storage_context,service_context=service_context)
|
| 101 |
st.session_state.index.storage_context.persist(persist_dir=index_name)
|
| 102 |
-
|
| 103 |
-
st.session_state.query_engine = st.session_state.index.as_query_engine(response_synthesizer=response_synthesizer,service_context=service_context)
|
| 104 |
-
st.session_state.chat_engine = CondenseQuestionChatEngine.from_defaults(
|
| 105 |
-
query_engine=st.session_state.query_engine,
|
| 106 |
-
condense_question_prompt=custom_prompt,
|
| 107 |
-
chat_history=chat_history,
|
| 108 |
-
verbose=True
|
| 109 |
-
)
|
| 110 |
if os.path.exists(pkl_name):
|
|
|
|
| 111 |
with open(pkl_name, "rb") as f:
|
| 112 |
st.session_state.stored_docs = pickle.load(f)
|
| 113 |
else:
|
|
@@ -139,8 +105,9 @@ st.session_state["login_token"] = msal_authentication(
|
|
| 139 |
html_id="html_id_for_button", # Optional, defaults to None. Corresponds to HTML id.
|
| 140 |
#key=1 # Optional if only a single instance is needed
|
| 141 |
)
|
| 142 |
-
st.write("Recevied login token:", st.session_state.login_token)
|
| 143 |
|
| 144 |
if st.session_state.login_token:
|
| 145 |
initialize_index()
|
| 146 |
-
st.write("ようこそ", st.session_state.login_token["account"]["name"])
|
|
|
|
|
|
| 2 |
import os
|
| 3 |
import pickle
|
| 4 |
import faiss
|
| 5 |
+
import common
|
| 6 |
|
| 7 |
from multiprocessing import Lock
|
| 8 |
from multiprocessing.managers import BaseManager
|
| 9 |
from llama_index.callbacks import CallbackManager, LlamaDebugHandler
|
| 10 |
+
from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext, StorageContext, load_index_from_storage
|
|
|
|
| 11 |
from llama_index.node_parser import SimpleNodeParser
|
| 12 |
from llama_index.langchain_helpers.text_splitter import TokenTextSplitter
|
| 13 |
from llama_index.constants import DEFAULT_CHUNK_OVERLAP
|
|
|
|
| 14 |
from llama_index.vector_stores.faiss import FaissVectorStore
|
| 15 |
from llama_index.graph_stores import SimpleGraphStore
|
| 16 |
from llama_index.storage.docstore import SimpleDocumentStore
|
| 17 |
from llama_index.storage.index_store import SimpleIndexStore
|
| 18 |
from msal_streamlit_authentication import msal_authentication
|
| 19 |
import tiktoken
|
| 20 |
+
from llama_index.callbacks import CallbackManager, LlamaDebugHandler
|
|
|
|
|
|
|
| 21 |
from dotenv import load_dotenv
|
|
|
|
| 22 |
|
| 23 |
load_dotenv()
|
| 24 |
|
|
|
|
| 35 |
REDIRECT_URI = os.environ["REDIRECT_URI"]
|
| 36 |
SCOPES = ["openid", "profile", "User.Read"]
|
| 37 |
|
| 38 |
+
index_name = os.environ["INDEX_NAME"]
|
| 39 |
+
pkl_name = os.environ["PKL_NAME"]
|
| 40 |
+
st.session_state.llama_debug_handler = LlamaDebugHandler()
|
| 41 |
+
from log import logger
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
|
| 43 |
def initialize_index():
|
| 44 |
logger.info("initialize_index start")
|
| 45 |
+
text_splitter = TokenTextSplitter(chunk_size=1500
|
| 46 |
, chunk_overlap=DEFAULT_CHUNK_OVERLAP
|
| 47 |
+
, tokenizer=tiktoken.encoding_for_model("gpt-4").encode)
|
| 48 |
node_parser = SimpleNodeParser(text_splitter=text_splitter)
|
| 49 |
d = 1536
|
| 50 |
k=2
|
| 51 |
faiss_index = faiss.IndexFlatL2(d)
|
| 52 |
# デバッグ用
|
| 53 |
+
callback_manager = CallbackManager([st.session_state.llama_debug_handler])
|
|
|
|
| 54 |
service_context = ServiceContext.from_defaults(node_parser=node_parser,callback_manager=callback_manager)
|
| 55 |
lock = Lock()
|
| 56 |
with lock:
|
| 57 |
if os.path.exists(index_name):
|
| 58 |
+
logger.info("start import index")
|
| 59 |
storage_context = StorageContext.from_defaults(
|
| 60 |
docstore=SimpleDocumentStore.from_persist_dir(persist_dir=index_name),
|
| 61 |
graph_store=SimpleGraphStore.from_persist_dir(persist_dir=index_name),
|
|
|
|
| 63 |
index_store=SimpleIndexStore.from_persist_dir(persist_dir=index_name),
|
| 64 |
)
|
| 65 |
st.session_state.index = load_index_from_storage(storage_context=storage_context,service_context=service_context)
|
| 66 |
+
common.setChatEngine()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
else:
|
| 68 |
+
logger.info("start create index")
|
| 69 |
documents = SimpleDirectoryReader("./documents").load_data()
|
| 70 |
vector_store = FaissVectorStore(faiss_index=faiss_index)
|
| 71 |
storage_context = StorageContext.from_defaults(vector_store=vector_store)
|
| 72 |
st.session_state.index = VectorStoreIndex.from_documents(documents, storage_context=storage_context,service_context=service_context)
|
| 73 |
st.session_state.index.storage_context.persist(persist_dir=index_name)
|
| 74 |
+
common.setChatEngine()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
if os.path.exists(pkl_name):
|
| 76 |
+
logger.info(pkl_name)
|
| 77 |
with open(pkl_name, "rb") as f:
|
| 78 |
st.session_state.stored_docs = pickle.load(f)
|
| 79 |
else:
|
|
|
|
| 105 |
html_id="html_id_for_button", # Optional, defaults to None. Corresponds to HTML id.
|
| 106 |
#key=1 # Optional if only a single instance is needed
|
| 107 |
)
|
| 108 |
+
# st.write("Recevied login token:", st.session_state.login_token)
|
| 109 |
|
| 110 |
if st.session_state.login_token:
|
| 111 |
initialize_index()
|
| 112 |
+
st.write("ようこそ", st.session_state.login_token["account"]["name"])
|
| 113 |
+
st.write("サイドメニューからファイルインポート又はChatbotへの質問を開始してください。")
|
common.py
CHANGED
|
@@ -1,19 +1,24 @@
|
|
| 1 |
import streamlit as st
|
| 2 |
-
import logging
|
| 3 |
import os
|
|
|
|
|
|
|
|
|
|
| 4 |
|
| 5 |
-
from
|
| 6 |
-
from requests_oauthlib import OAuth2Session
|
| 7 |
-
|
| 8 |
-
from time import time
|
| 9 |
-
# from requests_oauthlib import OAuth2Session
|
| 10 |
from streamlit import runtime
|
| 11 |
from streamlit.runtime.scriptrunner import get_script_run_ctx
|
| 12 |
-
import ipaddress
|
| 13 |
from streamlit.web.server.websocket_headers import _get_websocket_headers
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
|
| 18 |
# 接続元制御
|
| 19 |
ALLOW_IP_ADDRESS = os.environ["ALLOW_IP_ADDRESS"]
|
|
@@ -31,21 +36,14 @@ SCOPES = ["openid", "profile", "User.Read"]
|
|
| 31 |
# 接続元IP取得
|
| 32 |
def get_remote_ip():
|
| 33 |
ctx = get_script_run_ctx()
|
| 34 |
-
logger.info("ctx")
|
| 35 |
-
logger.info(ctx)
|
| 36 |
session_info = runtime.get_instance().get_client(ctx.session_id)
|
| 37 |
-
logger.info("session_info")
|
| 38 |
-
logger.info(session_info)
|
| 39 |
headers = _get_websocket_headers()
|
| 40 |
-
logger.info("headers")
|
| 41 |
-
logger.info(headers)
|
| 42 |
return session_info.request.remote_ip, headers.get("X-Forwarded-For")
|
| 43 |
|
| 44 |
# 接続元IP許可判定
|
| 45 |
def is_allow_ip_address():
|
| 46 |
remote_ip, x_forwarded_for = get_remote_ip()
|
| 47 |
-
logger.info("remote_ip")
|
| 48 |
-
logger.info(remote_ip)
|
| 49 |
if x_forwarded_for is not None:
|
| 50 |
remote_ip = x_forwarded_for
|
| 51 |
# localhost
|
|
@@ -54,8 +52,7 @@ def is_allow_ip_address():
|
|
| 54 |
|
| 55 |
# プライベートIP
|
| 56 |
ipaddr = ipaddress.IPv4Address(remote_ip)
|
| 57 |
-
logger.info("ipaddr")
|
| 58 |
-
logger.info(ipaddr)
|
| 59 |
if ipaddr.is_private:
|
| 60 |
return True
|
| 61 |
|
|
@@ -70,3 +67,128 @@ def check_login():
|
|
| 70 |
if "login_token" not in st.session_state or not st.session_state.login_token:
|
| 71 |
st.warning("**ログインしてください**")
|
| 72 |
st.stop()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import streamlit as st
|
|
|
|
| 2 |
import os
|
| 3 |
+
import pickle
|
| 4 |
+
import ipaddress
|
| 5 |
+
import tiktoken
|
| 6 |
|
| 7 |
+
from pathlib import Path
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
from streamlit import runtime
|
| 9 |
from streamlit.runtime.scriptrunner import get_script_run_ctx
|
|
|
|
| 10 |
from streamlit.web.server.websocket_headers import _get_websocket_headers
|
| 11 |
+
from llama_index import SimpleDirectoryReader
|
| 12 |
+
from llama_index import Prompt
|
| 13 |
+
from llama_index.chat_engine import CondenseQuestionChatEngine;
|
| 14 |
+
from llama_index.response_synthesizers import get_response_synthesizer
|
| 15 |
+
from llama_index import ServiceContext, SimpleDirectoryReader
|
| 16 |
+
from llama_index.node_parser import SimpleNodeParser
|
| 17 |
+
from llama_index.langchain_helpers.text_splitter import TokenTextSplitter
|
| 18 |
+
from llama_index.constants import DEFAULT_CHUNK_OVERLAP
|
| 19 |
+
from llama_index.response_synthesizers import get_response_synthesizer
|
| 20 |
+
from llama_index.callbacks import CallbackManager
|
| 21 |
+
from log import logger
|
| 22 |
|
| 23 |
# 接続元制御
|
| 24 |
ALLOW_IP_ADDRESS = os.environ["ALLOW_IP_ADDRESS"]
|
|
|
|
| 36 |
# 接続元IP取得
|
| 37 |
def get_remote_ip():
|
| 38 |
ctx = get_script_run_ctx()
|
|
|
|
|
|
|
| 39 |
session_info = runtime.get_instance().get_client(ctx.session_id)
|
|
|
|
|
|
|
| 40 |
headers = _get_websocket_headers()
|
|
|
|
|
|
|
| 41 |
return session_info.request.remote_ip, headers.get("X-Forwarded-For")
|
| 42 |
|
| 43 |
# 接続元IP許可判定
|
| 44 |
def is_allow_ip_address():
|
| 45 |
remote_ip, x_forwarded_for = get_remote_ip()
|
| 46 |
+
logger.info("remote_ip:"+remote_ip)
|
|
|
|
| 47 |
if x_forwarded_for is not None:
|
| 48 |
remote_ip = x_forwarded_for
|
| 49 |
# localhost
|
|
|
|
| 52 |
|
| 53 |
# プライベートIP
|
| 54 |
ipaddr = ipaddress.IPv4Address(remote_ip)
|
| 55 |
+
logger.info("ipaddr:"+str(ipaddr))
|
|
|
|
| 56 |
if ipaddr.is_private:
|
| 57 |
return True
|
| 58 |
|
|
|
|
| 67 |
if "login_token" not in st.session_state or not st.session_state.login_token:
|
| 68 |
st.warning("**ログインしてください**")
|
| 69 |
st.stop()
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
index_name = os.environ["INDEX_NAME"]
|
| 73 |
+
pkl_name = os.environ["PKL_NAME"]
|
| 74 |
+
# デバッグ用
|
| 75 |
+
text_splitter = TokenTextSplitter( chunk_size=1500
|
| 76 |
+
, chunk_overlap=DEFAULT_CHUNK_OVERLAP
|
| 77 |
+
, tokenizer=tiktoken.encoding_for_model("gpt-3.5-turbo").encode)
|
| 78 |
+
node_parser = SimpleNodeParser(text_splitter=text_splitter)
|
| 79 |
+
custom_prompt = Prompt("""\
|
| 80 |
+
以下はこれまでの会話履歴と、ドキュメントを検索して回答する必要がある、ユーザーからの会話文です。
|
| 81 |
+
会話と新しい会話文に基づいて、検索クエリを作成します。
|
| 82 |
+
挨拶された場合、挨拶を返してください。
|
| 83 |
+
質問された場合、検索した結果の回答を返してください。
|
| 84 |
+
答えを知らない場合は、「わかりません」と回答してください。
|
| 85 |
+
全ての回答は日本語で行ってください。
|
| 86 |
+
会話履歴:
|
| 87 |
+
{chat_history}
|
| 88 |
+
新しい会話文:
|
| 89 |
+
{question}
|
| 90 |
+
Search query:
|
| 91 |
+
""")
|
| 92 |
+
|
| 93 |
+
chat_history = []
|
| 94 |
+
def fileImportChatEngine(uploaded_file):
|
| 95 |
+
filepath = None
|
| 96 |
+
try:
|
| 97 |
+
filepath = os.path.join('documents', os.path.basename( uploaded_file.name))
|
| 98 |
+
logger.info(filepath)
|
| 99 |
+
with open(filepath, 'wb') as f:
|
| 100 |
+
f.write(uploaded_file.getvalue())
|
| 101 |
+
f.close()
|
| 102 |
+
document = SimpleDirectoryReader(input_files=[filepath]).load_data()[0]
|
| 103 |
+
st.session_state.stored_docs.append(uploaded_file.name)
|
| 104 |
+
logger.info(st.session_state.stored_docs)
|
| 105 |
+
st.session_state.index.insert(document=document)
|
| 106 |
+
st.session_state.index.storage_context.persist(persist_dir=index_name)
|
| 107 |
+
setChatEngine()
|
| 108 |
+
with open(pkl_name, "wb") as f:
|
| 109 |
+
print("pickle")
|
| 110 |
+
pickle.dump(st.session_state.stored_docs, f)
|
| 111 |
+
st.session_state["file_uploader_key"] += 1
|
| 112 |
+
st.experimental_rerun()
|
| 113 |
+
except Exception as e:
|
| 114 |
+
# cleanup temp file
|
| 115 |
+
logger.error(e)
|
| 116 |
+
if filepath is not None and os.path.exists(filepath):
|
| 117 |
+
os.remove(filepath)
|
| 118 |
+
|
| 119 |
+
def fileImportChatEngineCustomloader(uploaded_file,loader):
|
| 120 |
+
filepath = None
|
| 121 |
+
try:
|
| 122 |
+
filepath = os.path.join('documents', os.path.basename( uploaded_file.name))
|
| 123 |
+
logger.info(filepath)
|
| 124 |
+
with open(filepath, 'wb') as f:
|
| 125 |
+
f.write(uploaded_file.getvalue())
|
| 126 |
+
f.close()
|
| 127 |
+
document = loader.load_data(file=Path(filepath))[0]
|
| 128 |
+
st.session_state.stored_docs.append(uploaded_file.name)
|
| 129 |
+
logger.info(st.session_state.stored_docs)
|
| 130 |
+
st.session_state.index.insert(document=document)
|
| 131 |
+
st.session_state.index.storage_context.persist(persist_dir=index_name)
|
| 132 |
+
setChatEngine()
|
| 133 |
+
with open(pkl_name, "wb") as f:
|
| 134 |
+
print("pickle")
|
| 135 |
+
pickle.dump(st.session_state.stored_docs, f)
|
| 136 |
+
st.session_state["file_uploader_key"] += 1
|
| 137 |
+
st.experimental_rerun()
|
| 138 |
+
except Exception as e:
|
| 139 |
+
# cleanup temp file
|
| 140 |
+
logger.error(e)
|
| 141 |
+
if filepath is not None and os.path.exists(filepath):
|
| 142 |
+
os.remove(filepath)
|
| 143 |
+
|
| 144 |
+
def setChatEngine():
|
| 145 |
+
callback_manager = CallbackManager([st.session_state.llama_debug_handler])
|
| 146 |
+
service_context = ServiceContext.from_defaults(node_parser=node_parser,callback_manager=callback_manager)
|
| 147 |
+
response_synthesizer = get_response_synthesizer(response_mode='refine')
|
| 148 |
+
st.session_state.query_engine = st.session_state.index.as_query_engine(
|
| 149 |
+
response_synthesizer=response_synthesizer,
|
| 150 |
+
service_context=service_context,
|
| 151 |
+
)
|
| 152 |
+
st.session_state.chat_engine = CondenseQuestionChatEngine.from_defaults(
|
| 153 |
+
query_engine=st.session_state.query_engine,
|
| 154 |
+
condense_question_prompt=custom_prompt,
|
| 155 |
+
chat_history=chat_history,
|
| 156 |
+
verbose=True
|
| 157 |
+
)
|
| 158 |
+
|
| 159 |
+
# chat mode reacの記述
|
| 160 |
+
# from langchain.prompts.chat import (
|
| 161 |
+
# ChatPromptTemplate,
|
| 162 |
+
# HumanMessagePromptTemplate,
|
| 163 |
+
# SystemMessagePromptTemplate,
|
| 164 |
+
# )
|
| 165 |
+
# from llama_index.prompts import Prompt
|
| 166 |
+
# chat_text_qa_msgs = [
|
| 167 |
+
# SystemMessagePromptTemplate.from_template(
|
| 168 |
+
# "文脈が役に立たない場合でも、必ず質問に答えてください。"
|
| 169 |
+
# ),
|
| 170 |
+
# HumanMessagePromptTemplate.from_template(
|
| 171 |
+
# "以下に、コンテキスト情報を提供します。 \n"
|
| 172 |
+
# "---------------------\n"
|
| 173 |
+
# "{context_str}"
|
| 174 |
+
# "\n---------------------\n"
|
| 175 |
+
# "回答には以下を含めてください。\n"
|
| 176 |
+
# "・最初に問い合わせへのお礼してください\n"
|
| 177 |
+
# "・自己紹介してください\n"
|
| 178 |
+
# "・質問内容を要約してください\n"
|
| 179 |
+
# "・最後に不明な点がないか確認してください \n"
|
| 180 |
+
# "この情報を踏まえて、次の質問に回答してください: {query_str}\n"
|
| 181 |
+
# "答えを知らない場合は、「わからない」と回答してください。また、日本語で回答してください。"
|
| 182 |
+
# ),
|
| 183 |
+
# ]
|
| 184 |
+
# def setChatEngine():
|
| 185 |
+
# callback_manager = CallbackManager([st.session_state.llama_debug_handler])
|
| 186 |
+
# service_context = ServiceContext.from_defaults(node_parser=node_parser,callback_manager=callback_manager)
|
| 187 |
+
# response_synthesizer = get_response_synthesizer(response_mode='refine')
|
| 188 |
+
# st.session_state.chat_engine = st.session_state.index.as_chat_engine(
|
| 189 |
+
# response_synthesizer=response_synthesizer,
|
| 190 |
+
# service_context=service_context,
|
| 191 |
+
# chat_mode="react",
|
| 192 |
+
# text_qa_template= Prompt.from_langchain_prompt(ChatPromptTemplate.from_messages(chat_text_qa_msgs)),
|
| 193 |
+
# verbose=True
|
| 194 |
+
# )
|
log.py
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import logging
|
| 2 |
+
|
| 3 |
+
logging.basicConfig(level=logging.INFO)
|
| 4 |
+
logger = logging.getLogger("__name__")
|
| 5 |
+
|
pages/Chatbot.py
CHANGED
|
@@ -1,33 +1,11 @@
|
|
| 1 |
|
| 2 |
import streamlit as st
|
| 3 |
-
import logging
|
| 4 |
-
|
| 5 |
-
from llama_index import Prompt
|
| 6 |
-
|
| 7 |
import common
|
|
|
|
| 8 |
|
| 9 |
-
index_name =
|
| 10 |
-
pkl_name =
|
| 11 |
-
|
| 12 |
-
custom_prompt = Prompt("""\
|
| 13 |
-
以下はこれまでの会話履歴と、ドキュメントを検索して回答する必要がある、ユーザーからの会話文です。
|
| 14 |
-
会話と新しい会話文に基づいて、検索クエリを作成します。回答は日本語で行います。
|
| 15 |
-
新しい会話文が挨拶の場合、挨拶を返してください。
|
| 16 |
-
新しい会話文が質問の場合、検索した結果の回答を返してください。
|
| 17 |
-
答えがわからない場合は正直にわからないと回答してください。
|
| 18 |
-
会話履歴:
|
| 19 |
-
{chat_history}
|
| 20 |
-
新しい会話文:
|
| 21 |
-
{question}
|
| 22 |
-
Search query:
|
| 23 |
-
""")
|
| 24 |
-
|
| 25 |
-
chat_history = []
|
| 26 |
-
|
| 27 |
-
logging.basicConfig(level=logging.INFO)
|
| 28 |
-
logger = logging.getLogger("__name__")
|
| 29 |
-
logger.debug("調査用ログ")
|
| 30 |
-
|
| 31 |
common.check_login()
|
| 32 |
|
| 33 |
st.title("💬 Chatbot")
|
|
@@ -47,9 +25,7 @@ if prompt := st.chat_input():
|
|
| 47 |
st.session_state.messages.append({"role": "user", "content": prompt})
|
| 48 |
st.chat_message("user").write(prompt)
|
| 49 |
response = st.session_state.chat_engine.chat(prompt)
|
|
|
|
| 50 |
msg = str(response)
|
| 51 |
st.session_state.messages.append({"role": "assistant", "content": msg})
|
| 52 |
st.chat_message("assistant").write(msg)
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
|
|
|
| 1 |
|
| 2 |
import streamlit as st
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
import common
|
| 4 |
+
import os
|
| 5 |
|
| 6 |
+
index_name = os.environ["INDEX_NAME"]
|
| 7 |
+
pkl_name = os.environ["PKL_NAME"]
|
| 8 |
+
from log import logger
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
common.check_login()
|
| 10 |
|
| 11 |
st.title("💬 Chatbot")
|
|
|
|
| 25 |
st.session_state.messages.append({"role": "user", "content": prompt})
|
| 26 |
st.chat_message("user").write(prompt)
|
| 27 |
response = st.session_state.chat_engine.chat(prompt)
|
| 28 |
+
# logger.info(st.session_state.llama_debug_handler.get_llm_inputs_outputs()[-1][-1])
|
| 29 |
msg = str(response)
|
| 30 |
st.session_state.messages.append({"role": "assistant", "content": msg})
|
| 31 |
st.chat_message("assistant").write(msg)
|
|
|
|
|
|
|
|
|
pages/ChatbotWebRead.py
CHANGED
|
@@ -1,19 +1,21 @@
|
|
| 1 |
|
| 2 |
import streamlit as st
|
| 3 |
import faiss
|
| 4 |
-
import
|
| 5 |
-
|
| 6 |
-
from llama_index
|
| 7 |
-
from llama_index import
|
| 8 |
-
from llama_index.chat_engine import CondenseQuestionChatEngine;
|
| 9 |
from llama_index.node_parser import SimpleNodeParser
|
| 10 |
from llama_index.langchain_helpers.text_splitter import TokenTextSplitter
|
| 11 |
from llama_index.constants import DEFAULT_CHUNK_OVERLAP
|
| 12 |
from llama_index.response_synthesizers import get_response_synthesizer
|
| 13 |
-
from llama_index import
|
| 14 |
|
|
|
|
|
|
|
| 15 |
import tiktoken
|
| 16 |
import common
|
|
|
|
| 17 |
|
| 18 |
custom_prompt = Prompt("""\
|
| 19 |
以下はこれまでの会話履歴と、ドキュメントを検索して回答する必要がある、ユーザーからの会話文です。
|
|
@@ -27,13 +29,9 @@ custom_prompt = Prompt("""\
|
|
| 27 |
{question}
|
| 28 |
Search query:
|
| 29 |
""")
|
| 30 |
-
|
| 31 |
chat_history = []
|
| 32 |
|
| 33 |
-
|
| 34 |
-
logger = logging.getLogger("__name__")
|
| 35 |
-
logger.debug("調査用ログ")
|
| 36 |
-
|
| 37 |
|
| 38 |
common.check_login()
|
| 39 |
|
|
@@ -45,27 +43,29 @@ URLtext = st.text_input(
|
|
| 45 |
)
|
| 46 |
|
| 47 |
if st.button("URL reading",use_container_width=True):
|
| 48 |
-
text_splitter = TokenTextSplitter(
|
| 49 |
, chunk_overlap=DEFAULT_CHUNK_OVERLAP
|
| 50 |
, tokenizer=tiktoken.encoding_for_model("gpt-3.5-turbo").encode)
|
| 51 |
node_parser = SimpleNodeParser(text_splitter=text_splitter)
|
| 52 |
d = 1536
|
| 53 |
k=2
|
| 54 |
faiss_index = faiss.IndexFlatL2(d)
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
callback_manager = CallbackManager([llama_debug_handler])
|
| 58 |
service_context = ServiceContext.from_defaults(node_parser=node_parser,callback_manager=callback_manager)
|
| 59 |
|
| 60 |
webDocuments = SimpleWebPageReader(html_to_text=True).load_data(
|
| 61 |
[URLtext]
|
| 62 |
)
|
| 63 |
logger.info(webDocuments)
|
| 64 |
-
webIndex =
|
| 65 |
-
response_synthesizer = get_response_synthesizer(response_mode='
|
| 66 |
-
webQuery_engine = webIndex.as_query_engine(
|
|
|
|
|
|
|
|
|
|
| 67 |
st.session_state.web_chat_engine = CondenseQuestionChatEngine.from_defaults(
|
| 68 |
-
query_engine=webQuery_engine,
|
| 69 |
condense_question_prompt=custom_prompt,
|
| 70 |
chat_history=chat_history,
|
| 71 |
verbose=True
|
|
@@ -87,8 +87,7 @@ if prompt := st.chat_input(disabled = not URLtext):
|
|
| 87 |
st.session_state.webmessages.append({"role": "user", "content": prompt})
|
| 88 |
st.chat_message("user").write(prompt)
|
| 89 |
response = st.session_state.web_chat_engine.chat(prompt)
|
|
|
|
| 90 |
msg = str(response)
|
| 91 |
st.session_state.webmessages.append({"role": "assistant", "content": msg})
|
| 92 |
st.chat_message("assistant").write(msg)
|
| 93 |
-
|
| 94 |
-
|
|
|
|
| 1 |
|
| 2 |
import streamlit as st
|
| 3 |
import faiss
|
| 4 |
+
import langchain
|
| 5 |
+
from llama_index.callbacks import CallbackManager
|
| 6 |
+
from llama_index import ServiceContext,VectorStoreIndex
|
| 7 |
+
from llama_index.chat_engine import CondenseQuestionChatEngine
|
|
|
|
| 8 |
from llama_index.node_parser import SimpleNodeParser
|
| 9 |
from llama_index.langchain_helpers.text_splitter import TokenTextSplitter
|
| 10 |
from llama_index.constants import DEFAULT_CHUNK_OVERLAP
|
| 11 |
from llama_index.response_synthesizers import get_response_synthesizer
|
| 12 |
+
from llama_index import SimpleWebPageReader
|
| 13 |
|
| 14 |
+
# from llama_index.prompts import Prompt
|
| 15 |
+
from llama_index import Prompt
|
| 16 |
import tiktoken
|
| 17 |
import common
|
| 18 |
+
langchain.verbose = True
|
| 19 |
|
| 20 |
custom_prompt = Prompt("""\
|
| 21 |
以下はこれまでの会話履歴と、ドキュメントを検索して回答する必要がある、ユーザーからの会話文です。
|
|
|
|
| 29 |
{question}
|
| 30 |
Search query:
|
| 31 |
""")
|
|
|
|
| 32 |
chat_history = []
|
| 33 |
|
| 34 |
+
from log import logger
|
|
|
|
|
|
|
|
|
|
| 35 |
|
| 36 |
common.check_login()
|
| 37 |
|
|
|
|
| 43 |
)
|
| 44 |
|
| 45 |
if st.button("URL reading",use_container_width=True):
|
| 46 |
+
text_splitter = TokenTextSplitter( chunk_size=1500
|
| 47 |
, chunk_overlap=DEFAULT_CHUNK_OVERLAP
|
| 48 |
, tokenizer=tiktoken.encoding_for_model("gpt-3.5-turbo").encode)
|
| 49 |
node_parser = SimpleNodeParser(text_splitter=text_splitter)
|
| 50 |
d = 1536
|
| 51 |
k=2
|
| 52 |
faiss_index = faiss.IndexFlatL2(d)
|
| 53 |
+
|
| 54 |
+
callback_manager = CallbackManager([st.session_state.llama_debug_handler])
|
|
|
|
| 55 |
service_context = ServiceContext.from_defaults(node_parser=node_parser,callback_manager=callback_manager)
|
| 56 |
|
| 57 |
webDocuments = SimpleWebPageReader(html_to_text=True).load_data(
|
| 58 |
[URLtext]
|
| 59 |
)
|
| 60 |
logger.info(webDocuments)
|
| 61 |
+
webIndex = VectorStoreIndex.from_documents(webDocuments,service_context=service_context)
|
| 62 |
+
response_synthesizer = get_response_synthesizer(response_mode='refine')
|
| 63 |
+
st.session_state.webQuery_engine = webIndex.as_query_engine(
|
| 64 |
+
response_synthesizer=response_synthesizer,
|
| 65 |
+
service_context=service_context,
|
| 66 |
+
)
|
| 67 |
st.session_state.web_chat_engine = CondenseQuestionChatEngine.from_defaults(
|
| 68 |
+
query_engine=st.session_state.webQuery_engine,
|
| 69 |
condense_question_prompt=custom_prompt,
|
| 70 |
chat_history=chat_history,
|
| 71 |
verbose=True
|
|
|
|
| 87 |
st.session_state.webmessages.append({"role": "user", "content": prompt})
|
| 88 |
st.chat_message("user").write(prompt)
|
| 89 |
response = st.session_state.web_chat_engine.chat(prompt)
|
| 90 |
+
logger.debug(st.session_state.llama_debug_handler.get_llm_inputs_outputs())
|
| 91 |
msg = str(response)
|
| 92 |
st.session_state.webmessages.append({"role": "assistant", "content": msg})
|
| 93 |
st.chat_message("assistant").write(msg)
|
|
|
|
|
|
pages/ImportExcelFile.py
CHANGED
|
@@ -1,80 +1,21 @@
|
|
| 1 |
import streamlit as st
|
| 2 |
-
import anthropic
|
| 3 |
-
from pathlib import Path
|
| 4 |
-
from llama_index import download_loader,Prompt
|
| 5 |
-
import os
|
| 6 |
-
import pickle
|
| 7 |
-
import logging
|
| 8 |
import common
|
| 9 |
|
| 10 |
-
from
|
| 11 |
-
from
|
| 12 |
-
|
| 13 |
-
index_name = "./data/storage"
|
| 14 |
-
pkl_name = "./data/stored_documents.pkl"
|
| 15 |
-
|
| 16 |
-
custom_prompt = Prompt("""\
|
| 17 |
-
以下はこれまでの会話履歴と、ドキュメントを検索して回答する必要がある、ユーザーからの会話文です。
|
| 18 |
-
会話と新しい会話文に基づいて、検索クエリを作成します。回答は日本語で行います。
|
| 19 |
-
新しい会話文が挨拶の場合、挨拶を返してください。
|
| 20 |
-
新しい会話文が質問の場合、検索した結果の回答を返してください。
|
| 21 |
-
答えがわからない場合は正直にわからないと回答してください。
|
| 22 |
-
会話履歴:
|
| 23 |
-
{chat_history}
|
| 24 |
-
新しい会話文:
|
| 25 |
-
{question}
|
| 26 |
-
Search query:
|
| 27 |
-
""")
|
| 28 |
-
|
| 29 |
-
chat_history = []
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
logging.basicConfig(level=logging.INFO)
|
| 33 |
-
logger = logging.getLogger("__name__")
|
| 34 |
-
logger.debug("調査用ログ")
|
| 35 |
|
| 36 |
common.check_login()
|
| 37 |
|
| 38 |
-
PandasExcelReader = download_loader("PandasExcelReader")
|
| 39 |
loader = PandasExcelReader(pandas_config={"header": 0})
|
| 40 |
|
| 41 |
if "file_uploader_key" not in st.session_state:
|
| 42 |
st.session_state["file_uploader_key"] = 0
|
| 43 |
|
| 44 |
-
st.title("📝
|
| 45 |
uploaded_file = st.file_uploader("Upload an article", type=("xlsx"))
|
| 46 |
|
| 47 |
if st.button("import",use_container_width=True):
|
| 48 |
-
|
| 49 |
-
try:
|
| 50 |
-
filepath = os.path.join('documents', os.path.basename( uploaded_file.name))
|
| 51 |
-
logger.info(filepath)
|
| 52 |
-
with open(filepath, 'wb') as f:
|
| 53 |
-
f.write(uploaded_file.getvalue())
|
| 54 |
-
f.close()
|
| 55 |
-
document = loader.load_data(file=filepath)[0]
|
| 56 |
-
st.session_state.stored_docs.append(uploaded_file.name)
|
| 57 |
-
logger.info(st.session_state.stored_docs)
|
| 58 |
-
st.session_state.index.insert(document=document)
|
| 59 |
-
st.session_state.index.storage_context.persist(persist_dir=index_name)
|
| 60 |
-
response_synthesizer = get_response_synthesizer(response_mode='refine')
|
| 61 |
-
st.session_state.query_engine = st.session_state.index.as_query_engine(response_synthesizer=response_synthesizer)
|
| 62 |
-
st.session_state.chat_engine = CondenseQuestionChatEngine.from_defaults(
|
| 63 |
-
query_engine=st.session_state.query_engine,
|
| 64 |
-
condense_question_prompt=custom_prompt,
|
| 65 |
-
chat_history=chat_history,
|
| 66 |
-
verbose=True
|
| 67 |
-
)
|
| 68 |
-
with open(pkl_name, "wb") as f:
|
| 69 |
-
print("pickle")
|
| 70 |
-
pickle.dump(st.session_state.stored_docs, f)
|
| 71 |
-
st.session_state["file_uploader_key"] += 1
|
| 72 |
-
st.experimental_rerun()
|
| 73 |
-
except Exception as e:
|
| 74 |
-
# cleanup temp file
|
| 75 |
-
logger.error(e)
|
| 76 |
-
if filepath is not None and os.path.exists(filepath):
|
| 77 |
-
os.remove(filepath)
|
| 78 |
|
| 79 |
st.subheader("Import File List")
|
| 80 |
if "stored_docs" in st.session_state:
|
|
|
|
| 1 |
import streamlit as st
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
import common
|
| 3 |
|
| 4 |
+
from llama_hub.file.pandas_excel.base import PandasExcelReader
|
| 5 |
+
from log import logger
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
common.check_login()
|
| 8 |
|
|
|
|
| 9 |
loader = PandasExcelReader(pandas_config={"header": 0})
|
| 10 |
|
| 11 |
if "file_uploader_key" not in st.session_state:
|
| 12 |
st.session_state["file_uploader_key"] = 0
|
| 13 |
|
| 14 |
+
st.title("📝 ImportExcelFile")
|
| 15 |
uploaded_file = st.file_uploader("Upload an article", type=("xlsx"))
|
| 16 |
|
| 17 |
if st.button("import",use_container_width=True):
|
| 18 |
+
common.fileImportChatEngineCustomloader(uploaded_file,loader)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
|
| 20 |
st.subheader("Import File List")
|
| 21 |
if "stored_docs" in st.session_state:
|
pages/ImportFile.py
CHANGED
|
@@ -1,39 +1,7 @@
|
|
| 1 |
-
import openai
|
| 2 |
import streamlit as st
|
| 3 |
-
import os
|
| 4 |
-
import pickle
|
| 5 |
-
import logging
|
| 6 |
-
|
| 7 |
-
from llama_index import SimpleDirectoryReader
|
| 8 |
-
from llama_index.chat_engine import CondenseQuestionChatEngine;
|
| 9 |
-
from llama_index.response_synthesizers import get_response_synthesizer
|
| 10 |
-
from llama_index import Prompt, SimpleDirectoryReader
|
| 11 |
-
|
| 12 |
-
from logging import getLogger, StreamHandler, Formatter
|
| 13 |
-
|
| 14 |
import common
|
| 15 |
|
| 16 |
-
|
| 17 |
-
pkl_name = "./data/stored_documents.pkl"
|
| 18 |
-
|
| 19 |
-
custom_prompt = Prompt("""\
|
| 20 |
-
以下はこれまでの会話履歴と、ドキュメントを検索して回答する必要がある、ユーザーからの会話文です。
|
| 21 |
-
会話と新しい会話文に基づいて、検索クエリを作成します。回答は日本語で行います。
|
| 22 |
-
新しい会話文が挨拶の場合、挨拶を返してください。
|
| 23 |
-
新しい会話文が質問の場合、検索した結果の回答を返してください。
|
| 24 |
-
答えがわからない場合は正直にわからないと回答してください。
|
| 25 |
-
会話履歴:
|
| 26 |
-
{chat_history}
|
| 27 |
-
新しい会話文:
|
| 28 |
-
{question}
|
| 29 |
-
Search query:
|
| 30 |
-
""")
|
| 31 |
-
|
| 32 |
-
chat_history = []
|
| 33 |
-
|
| 34 |
-
logging.basicConfig(level=logging.INFO)
|
| 35 |
-
logger = logging.getLogger("__name__")
|
| 36 |
-
logger.debug("調査用ログ")
|
| 37 |
|
| 38 |
common.check_login()
|
| 39 |
|
|
@@ -42,39 +10,9 @@ if "file_uploader_key" not in st.session_state:
|
|
| 42 |
|
| 43 |
st.title("📝 ImportFile")
|
| 44 |
|
| 45 |
-
uploaded_file = st.file_uploader("Upload an article", type=("txt", "md"
|
| 46 |
if st.button("import",use_container_width=True):
|
| 47 |
-
|
| 48 |
-
try:
|
| 49 |
-
filepath = os.path.join('documents', os.path.basename( uploaded_file.name))
|
| 50 |
-
logger.info(filepath)
|
| 51 |
-
with open(filepath, 'wb') as f:
|
| 52 |
-
f.write(uploaded_file.getvalue())
|
| 53 |
-
f.close()
|
| 54 |
-
document = SimpleDirectoryReader(input_files=[filepath]).load_data()[0]
|
| 55 |
-
logger.info(document)
|
| 56 |
-
st.session_state.stored_docs.append(uploaded_file.name)
|
| 57 |
-
logger.info(st.session_state.stored_docs)
|
| 58 |
-
st.session_state.index.insert(document=document)
|
| 59 |
-
st.session_state.index.storage_context.persist(persist_dir=index_name)
|
| 60 |
-
response_synthesizer = get_response_synthesizer(response_mode='refine')
|
| 61 |
-
st.session_state.query_engine = st.session_state.index.as_query_engine(response_synthesizer=response_synthesizer)
|
| 62 |
-
st.session_state.chat_engine = CondenseQuestionChatEngine.from_defaults(
|
| 63 |
-
query_engine=st.session_state.query_engine,
|
| 64 |
-
condense_question_prompt=custom_prompt,
|
| 65 |
-
chat_history=chat_history,
|
| 66 |
-
verbose=True
|
| 67 |
-
)
|
| 68 |
-
with open(pkl_name, "wb") as f:
|
| 69 |
-
print("pickle")
|
| 70 |
-
pickle.dump(st.session_state.stored_docs, f)
|
| 71 |
-
st.session_state["file_uploader_key"] += 1
|
| 72 |
-
st.experimental_rerun()
|
| 73 |
-
except Exception as e:
|
| 74 |
-
# cleanup temp file
|
| 75 |
-
logger.error(e)
|
| 76 |
-
if filepath is not None and os.path.exists(filepath):
|
| 77 |
-
os.remove(filepath)
|
| 78 |
|
| 79 |
st.subheader("Import File List")
|
| 80 |
if "stored_docs" in st.session_state:
|
|
|
|
|
|
|
| 1 |
import streamlit as st
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
import common
|
| 3 |
|
| 4 |
+
from log import logger
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
|
| 6 |
common.check_login()
|
| 7 |
|
|
|
|
| 10 |
|
| 11 |
st.title("📝 ImportFile")
|
| 12 |
|
| 13 |
+
uploaded_file = st.file_uploader("Upload an article", type=("txt", "md"),key=st.session_state["file_uploader_key"])
|
| 14 |
if st.button("import",use_container_width=True):
|
| 15 |
+
common.fileImportChatEngine(uploaded_file)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
|
| 17 |
st.subheader("Import File List")
|
| 18 |
if "stored_docs" in st.session_state:
|
pages/ImportPdfFile.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import common
|
| 3 |
+
|
| 4 |
+
from llama_hub.file.cjk_pdf.base import CJKPDFReader
|
| 5 |
+
from log import logger
|
| 6 |
+
|
| 7 |
+
common.check_login()
|
| 8 |
+
|
| 9 |
+
loader = CJKPDFReader()
|
| 10 |
+
|
| 11 |
+
if "file_uploader_key" not in st.session_state:
|
| 12 |
+
st.session_state["file_uploader_key"] = 0
|
| 13 |
+
|
| 14 |
+
st.title("📝 ImportPdfFile")
|
| 15 |
+
uploaded_file = st.file_uploader("Upload an article", type=("pdf"))
|
| 16 |
+
|
| 17 |
+
if st.button("import",use_container_width=True):
|
| 18 |
+
common.fileImportChatEngineCustomloader(uploaded_file,loader)
|
| 19 |
+
|
| 20 |
+
st.subheader("Import File List")
|
| 21 |
+
if "stored_docs" in st.session_state:
|
| 22 |
+
logger.info(st.session_state.stored_docs)
|
| 23 |
+
for docname in st.session_state.stored_docs:
|
| 24 |
+
st.write(docname)
|
pages/ImportPptxFile.py
CHANGED
|
@@ -1,41 +1,11 @@
|
|
| 1 |
import streamlit as st
|
| 2 |
-
import anthropic
|
| 3 |
-
from pathlib import Path
|
| 4 |
-
from llama_index import download_loader,Prompt
|
| 5 |
-
import os
|
| 6 |
-
import pickle
|
| 7 |
-
import logging
|
| 8 |
import common
|
| 9 |
|
| 10 |
-
from
|
| 11 |
-
from
|
| 12 |
-
|
| 13 |
-
index_name = "./data/storage"
|
| 14 |
-
pkl_name = "./data/stored_documents.pkl"
|
| 15 |
-
|
| 16 |
-
custom_prompt = Prompt("""\
|
| 17 |
-
以下はこれまでの会話履歴と、ドキュメントを検索して回答する必要がある、ユーザーからの会話文です。
|
| 18 |
-
会話と新しい会話文に基づいて、検索クエリを作成します。回答は日本語で行います。
|
| 19 |
-
新しい会話文が挨拶の場合、挨拶を返してください。
|
| 20 |
-
新しい会話文が質問の場合、検索した結果の回答を返してください。
|
| 21 |
-
答えがわからない場合は正直にわからないと回答してください。
|
| 22 |
-
会話履歴:
|
| 23 |
-
{chat_history}
|
| 24 |
-
新しい会話文:
|
| 25 |
-
{question}
|
| 26 |
-
Search query:
|
| 27 |
-
""")
|
| 28 |
-
|
| 29 |
-
chat_history = []
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
logging.basicConfig(level=logging.INFO)
|
| 33 |
-
logger = logging.getLogger("__name__")
|
| 34 |
-
logger.debug("調査用ログ")
|
| 35 |
|
| 36 |
common.check_login()
|
| 37 |
|
| 38 |
-
PptxReader = download_loader("PptxReader")
|
| 39 |
loader = PptxReader()
|
| 40 |
|
| 41 |
if "file_uploader_key" not in st.session_state:
|
|
@@ -45,36 +15,7 @@ st.title("📝 ImportPptxFile")
|
|
| 45 |
uploaded_file = st.file_uploader("Upload an article", type=("pptx"))
|
| 46 |
|
| 47 |
if st.button("import",use_container_width=True):
|
| 48 |
-
|
| 49 |
-
try:
|
| 50 |
-
filepath = os.path.join('documents', os.path.basename( uploaded_file.name))
|
| 51 |
-
logger.info(filepath)
|
| 52 |
-
with open(filepath, 'wb') as f:
|
| 53 |
-
f.write(uploaded_file.getvalue())
|
| 54 |
-
f.close()
|
| 55 |
-
document = loader.load_data(file=filepath)[0]
|
| 56 |
-
st.session_state.stored_docs.append(uploaded_file.name)
|
| 57 |
-
logger.info(st.session_state.stored_docs)
|
| 58 |
-
st.session_state.index.insert(document=document)
|
| 59 |
-
st.session_state.index.storage_context.persist(persist_dir=index_name)
|
| 60 |
-
response_synthesizer = get_response_synthesizer(response_mode='refine')
|
| 61 |
-
st.session_state.query_engine = st.session_state.index.as_query_engine(response_synthesizer=response_synthesizer)
|
| 62 |
-
st.session_state.chat_engine = CondenseQuestionChatEngine.from_defaults(
|
| 63 |
-
query_engine=st.session_state.query_engine,
|
| 64 |
-
condense_question_prompt=custom_prompt,
|
| 65 |
-
chat_history=chat_history,
|
| 66 |
-
verbose=True
|
| 67 |
-
)
|
| 68 |
-
with open(pkl_name, "wb") as f:
|
| 69 |
-
print("pickle")
|
| 70 |
-
pickle.dump(st.session_state.stored_docs, f)
|
| 71 |
-
st.session_state["file_uploader_key"] += 1
|
| 72 |
-
st.experimental_rerun()
|
| 73 |
-
except Exception as e:
|
| 74 |
-
# cleanup temp file
|
| 75 |
-
logger.error(e)
|
| 76 |
-
if filepath is not None and os.path.exists(filepath):
|
| 77 |
-
os.remove(filepath)
|
| 78 |
|
| 79 |
st.subheader("Import File List")
|
| 80 |
if "stored_docs" in st.session_state:
|
|
|
|
| 1 |
import streamlit as st
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
import common
|
| 3 |
|
| 4 |
+
from llama_hub.file.pptx.base import PptxReader
|
| 5 |
+
from log import logger
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
common.check_login()
|
| 8 |
|
|
|
|
| 9 |
loader = PptxReader()
|
| 10 |
|
| 11 |
if "file_uploader_key" not in st.session_state:
|
|
|
|
| 15 |
uploaded_file = st.file_uploader("Upload an article", type=("pptx"))
|
| 16 |
|
| 17 |
if st.button("import",use_container_width=True):
|
| 18 |
+
common.fileImportChatEngineCustomloader(uploaded_file,loader)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
|
| 20 |
st.subheader("Import File List")
|
| 21 |
if "stored_docs" in st.session_state:
|
pages/ImportWordFile.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import common
|
| 3 |
+
|
| 4 |
+
from llama_hub.file.docx.base import DocxReader
|
| 5 |
+
from log import logger
|
| 6 |
+
|
| 7 |
+
common.check_login()
|
| 8 |
+
|
| 9 |
+
loader = DocxReader()
|
| 10 |
+
|
| 11 |
+
if "file_uploader_key" not in st.session_state:
|
| 12 |
+
st.session_state["file_uploader_key"] = 0
|
| 13 |
+
|
| 14 |
+
st.title("📝 ImportWordFile")
|
| 15 |
+
uploaded_file = st.file_uploader("Upload an article", type=("docx"))
|
| 16 |
+
|
| 17 |
+
if st.button("import",use_container_width=True):
|
| 18 |
+
common.fileImportChatEngineCustomloader(uploaded_file,loader)
|
| 19 |
+
|
| 20 |
+
st.subheader("Import File List")
|
| 21 |
+
if "stored_docs" in st.session_state:
|
| 22 |
+
logger.info(st.session_state.stored_docs)
|
| 23 |
+
for docname in st.session_state.stored_docs:
|
| 24 |
+
st.write(docname)
|
requirements.txt
CHANGED
|
@@ -3,7 +3,8 @@ langchain>=0.0.217
|
|
| 3 |
openai
|
| 4 |
duckduckgo-search
|
| 5 |
anthropic
|
| 6 |
-
|
|
|
|
| 7 |
pypdf==3.9.0
|
| 8 |
faiss-cpu==1.7.4
|
| 9 |
html2text
|
|
@@ -11,5 +12,12 @@ streamlit-authenticator
|
|
| 11 |
extra_streamlit_components
|
| 12 |
requests_oauthlib
|
| 13 |
python-dotenv
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
llama_hub
|
| 15 |
msal-streamlit-authentication
|
|
|
|
|
|
|
|
|
| 3 |
openai
|
| 4 |
duckduckgo-search
|
| 5 |
anthropic
|
| 6 |
+
nltk
|
| 7 |
+
llama-index==0.8.4
|
| 8 |
pypdf==3.9.0
|
| 9 |
faiss-cpu==1.7.4
|
| 10 |
html2text
|
|
|
|
| 12 |
extra_streamlit_components
|
| 13 |
requests_oauthlib
|
| 14 |
python-dotenv
|
| 15 |
+
torch
|
| 16 |
+
transformers
|
| 17 |
+
python-pptx
|
| 18 |
+
Pillow
|
| 19 |
+
openpyxl
|
| 20 |
llama_hub
|
| 21 |
msal-streamlit-authentication
|
| 22 |
+
pdfminer.six
|
| 23 |
+
docx2txt
|