naotakigawa commited on
Commit
3291faa
·
1 Parent(s): 875054f

Upload 11 files

Browse files
app.py CHANGED
@@ -2,28 +2,23 @@ import streamlit as st
2
  import os
3
  import pickle
4
  import faiss
5
- import logging
6
 
7
  from multiprocessing import Lock
8
  from multiprocessing.managers import BaseManager
9
  from llama_index.callbacks import CallbackManager, LlamaDebugHandler
10
- from llama_index import VectorStoreIndex, Document,Prompt, SimpleDirectoryReader, ServiceContext, StorageContext, load_index_from_storage
11
- from llama_index.chat_engine import CondenseQuestionChatEngine;
12
  from llama_index.node_parser import SimpleNodeParser
13
  from llama_index.langchain_helpers.text_splitter import TokenTextSplitter
14
  from llama_index.constants import DEFAULT_CHUNK_OVERLAP
15
- from llama_index.response_synthesizers import get_response_synthesizer
16
  from llama_index.vector_stores.faiss import FaissVectorStore
17
  from llama_index.graph_stores import SimpleGraphStore
18
  from llama_index.storage.docstore import SimpleDocumentStore
19
  from llama_index.storage.index_store import SimpleIndexStore
20
  from msal_streamlit_authentication import msal_authentication
21
  import tiktoken
22
-
23
- from requests_oauthlib import OAuth2Session
24
- from time import time
25
  from dotenv import load_dotenv
26
- from streamlit import net_util
27
 
28
  load_dotenv()
29
 
@@ -40,44 +35,27 @@ AUTHORITY = f"https://login.microsoftonline.com/{TENANT_ID}"
40
  REDIRECT_URI = os.environ["REDIRECT_URI"]
41
  SCOPES = ["openid", "profile", "User.Read"]
42
 
43
- index_name = "./data/storage"
44
- pkl_name = "./data/stored_documents.pkl"
45
-
46
- custom_prompt = Prompt("""\
47
- 以下はこれまでの会話履歴と、ドキュメントを検索して回答する必要がある、ユーザーからの会話文です。
48
- 会話と新しい会話文に基づいて、検索クエリを作成します。回答は日本語で行います。
49
- 新しい会話文が挨拶の場合、挨拶を返してください。
50
- 新しい会話文が質問の場合、検索した結果の回答を返してください。
51
- 答えがわからない場合は正直にわからないと回答してください。
52
- 会話履歴:
53
- {chat_history}
54
- 新しい会話文:
55
- {question}
56
- Search query:
57
- """)
58
-
59
- chat_history = []
60
-
61
- logging.basicConfig(level=logging.INFO)
62
- logger = logging.getLogger("__name__")
63
- logger.debug("調査用ログ")
64
 
65
  def initialize_index():
66
  logger.info("initialize_index start")
67
- text_splitter = TokenTextSplitter(separator="。", chunk_size=1500
68
  , chunk_overlap=DEFAULT_CHUNK_OVERLAP
69
- , tokenizer=tiktoken.encoding_for_model("gpt-3.5-turbo").encode)
70
  node_parser = SimpleNodeParser(text_splitter=text_splitter)
71
  d = 1536
72
  k=2
73
  faiss_index = faiss.IndexFlatL2(d)
74
  # デバッグ用
75
- llama_debug_handler = LlamaDebugHandler()
76
- callback_manager = CallbackManager([llama_debug_handler])
77
  service_context = ServiceContext.from_defaults(node_parser=node_parser,callback_manager=callback_manager)
78
  lock = Lock()
79
  with lock:
80
  if os.path.exists(index_name):
 
81
  storage_context = StorageContext.from_defaults(
82
  docstore=SimpleDocumentStore.from_persist_dir(persist_dir=index_name),
83
  graph_store=SimpleGraphStore.from_persist_dir(persist_dir=index_name),
@@ -85,29 +63,17 @@ def initialize_index():
85
  index_store=SimpleIndexStore.from_persist_dir(persist_dir=index_name),
86
  )
87
  st.session_state.index = load_index_from_storage(storage_context=storage_context,service_context=service_context)
88
- response_synthesizer = get_response_synthesizer(response_mode='refine')
89
- st.session_state.query_engine = st.session_state.index.as_query_engine(response_synthesizer=response_synthesizer,service_context=service_context)
90
- st.session_state.chat_engine = CondenseQuestionChatEngine.from_defaults(
91
- query_engine=st.session_state.query_engine,
92
- condense_question_prompt=custom_prompt,
93
- chat_history=chat_history,
94
- verbose=True
95
- )
96
  else:
 
97
  documents = SimpleDirectoryReader("./documents").load_data()
98
  vector_store = FaissVectorStore(faiss_index=faiss_index)
99
  storage_context = StorageContext.from_defaults(vector_store=vector_store)
100
  st.session_state.index = VectorStoreIndex.from_documents(documents, storage_context=storage_context,service_context=service_context)
101
  st.session_state.index.storage_context.persist(persist_dir=index_name)
102
- response_synthesizer = get_response_synthesizer(response_mode='refine')
103
- st.session_state.query_engine = st.session_state.index.as_query_engine(response_synthesizer=response_synthesizer,service_context=service_context)
104
- st.session_state.chat_engine = CondenseQuestionChatEngine.from_defaults(
105
- query_engine=st.session_state.query_engine,
106
- condense_question_prompt=custom_prompt,
107
- chat_history=chat_history,
108
- verbose=True
109
- )
110
  if os.path.exists(pkl_name):
 
111
  with open(pkl_name, "rb") as f:
112
  st.session_state.stored_docs = pickle.load(f)
113
  else:
@@ -139,8 +105,9 @@ st.session_state["login_token"] = msal_authentication(
139
  html_id="html_id_for_button", # Optional, defaults to None. Corresponds to HTML id.
140
  #key=1 # Optional if only a single instance is needed
141
  )
142
- st.write("Recevied login token:", st.session_state.login_token)
143
 
144
  if st.session_state.login_token:
145
  initialize_index()
146
- st.write("ようこそ", st.session_state.login_token["account"]["name"])
 
 
2
  import os
3
  import pickle
4
  import faiss
5
+ import common
6
 
7
  from multiprocessing import Lock
8
  from multiprocessing.managers import BaseManager
9
  from llama_index.callbacks import CallbackManager, LlamaDebugHandler
10
+ from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext, StorageContext, load_index_from_storage
 
11
  from llama_index.node_parser import SimpleNodeParser
12
  from llama_index.langchain_helpers.text_splitter import TokenTextSplitter
13
  from llama_index.constants import DEFAULT_CHUNK_OVERLAP
 
14
  from llama_index.vector_stores.faiss import FaissVectorStore
15
  from llama_index.graph_stores import SimpleGraphStore
16
  from llama_index.storage.docstore import SimpleDocumentStore
17
  from llama_index.storage.index_store import SimpleIndexStore
18
  from msal_streamlit_authentication import msal_authentication
19
  import tiktoken
20
+ from llama_index.callbacks import CallbackManager, LlamaDebugHandler
 
 
21
  from dotenv import load_dotenv
 
22
 
23
  load_dotenv()
24
 
 
35
  REDIRECT_URI = os.environ["REDIRECT_URI"]
36
  SCOPES = ["openid", "profile", "User.Read"]
37
 
38
+ index_name = os.environ["INDEX_NAME"]
39
+ pkl_name = os.environ["PKL_NAME"]
40
+ st.session_state.llama_debug_handler = LlamaDebugHandler()
41
+ from log import logger
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
 
43
  def initialize_index():
44
  logger.info("initialize_index start")
45
+ text_splitter = TokenTextSplitter(chunk_size=1500
46
  , chunk_overlap=DEFAULT_CHUNK_OVERLAP
47
+ , tokenizer=tiktoken.encoding_for_model("gpt-4").encode)
48
  node_parser = SimpleNodeParser(text_splitter=text_splitter)
49
  d = 1536
50
  k=2
51
  faiss_index = faiss.IndexFlatL2(d)
52
  # デバッグ用
53
+ callback_manager = CallbackManager([st.session_state.llama_debug_handler])
 
54
  service_context = ServiceContext.from_defaults(node_parser=node_parser,callback_manager=callback_manager)
55
  lock = Lock()
56
  with lock:
57
  if os.path.exists(index_name):
58
+ logger.info("start import index")
59
  storage_context = StorageContext.from_defaults(
60
  docstore=SimpleDocumentStore.from_persist_dir(persist_dir=index_name),
61
  graph_store=SimpleGraphStore.from_persist_dir(persist_dir=index_name),
 
63
  index_store=SimpleIndexStore.from_persist_dir(persist_dir=index_name),
64
  )
65
  st.session_state.index = load_index_from_storage(storage_context=storage_context,service_context=service_context)
66
+ common.setChatEngine()
 
 
 
 
 
 
 
67
  else:
68
+ logger.info("start create index")
69
  documents = SimpleDirectoryReader("./documents").load_data()
70
  vector_store = FaissVectorStore(faiss_index=faiss_index)
71
  storage_context = StorageContext.from_defaults(vector_store=vector_store)
72
  st.session_state.index = VectorStoreIndex.from_documents(documents, storage_context=storage_context,service_context=service_context)
73
  st.session_state.index.storage_context.persist(persist_dir=index_name)
74
+ common.setChatEngine()
 
 
 
 
 
 
 
75
  if os.path.exists(pkl_name):
76
+ logger.info(pkl_name)
77
  with open(pkl_name, "rb") as f:
78
  st.session_state.stored_docs = pickle.load(f)
79
  else:
 
105
  html_id="html_id_for_button", # Optional, defaults to None. Corresponds to HTML id.
106
  #key=1 # Optional if only a single instance is needed
107
  )
108
+ # st.write("Recevied login token:", st.session_state.login_token)
109
 
110
  if st.session_state.login_token:
111
  initialize_index()
112
+ st.write("ようこそ", st.session_state.login_token["account"]["name"])
113
+ st.write("サイドメニューからファイルインポート又はChatbotへの質問を開始してください。")
common.py CHANGED
@@ -1,19 +1,24 @@
1
  import streamlit as st
2
- import logging
3
  import os
 
 
 
4
 
5
- from time import time
6
- from requests_oauthlib import OAuth2Session
7
-
8
- from time import time
9
- # from requests_oauthlib import OAuth2Session
10
  from streamlit import runtime
11
  from streamlit.runtime.scriptrunner import get_script_run_ctx
12
- import ipaddress
13
  from streamlit.web.server.websocket_headers import _get_websocket_headers
14
- logging.basicConfig(level=logging.INFO)
15
- logger = logging.getLogger("__name__")
16
- logger.debug("調査用ログ")
 
 
 
 
 
 
 
 
17
 
18
  # 接続元制御
19
  ALLOW_IP_ADDRESS = os.environ["ALLOW_IP_ADDRESS"]
@@ -31,21 +36,14 @@ SCOPES = ["openid", "profile", "User.Read"]
31
  # 接続元IP取得
32
  def get_remote_ip():
33
  ctx = get_script_run_ctx()
34
- logger.info("ctx")
35
- logger.info(ctx)
36
  session_info = runtime.get_instance().get_client(ctx.session_id)
37
- logger.info("session_info")
38
- logger.info(session_info)
39
  headers = _get_websocket_headers()
40
- logger.info("headers")
41
- logger.info(headers)
42
  return session_info.request.remote_ip, headers.get("X-Forwarded-For")
43
 
44
  # 接続元IP許可判定
45
  def is_allow_ip_address():
46
  remote_ip, x_forwarded_for = get_remote_ip()
47
- logger.info("remote_ip")
48
- logger.info(remote_ip)
49
  if x_forwarded_for is not None:
50
  remote_ip = x_forwarded_for
51
  # localhost
@@ -54,8 +52,7 @@ def is_allow_ip_address():
54
 
55
  # プライベートIP
56
  ipaddr = ipaddress.IPv4Address(remote_ip)
57
- logger.info("ipaddr")
58
- logger.info(ipaddr)
59
  if ipaddr.is_private:
60
  return True
61
 
@@ -70,3 +67,128 @@ def check_login():
70
  if "login_token" not in st.session_state or not st.session_state.login_token:
71
  st.warning("**ログインしてください**")
72
  st.stop()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
 
2
  import os
3
+ import pickle
4
+ import ipaddress
5
+ import tiktoken
6
 
7
+ from pathlib import Path
 
 
 
 
8
  from streamlit import runtime
9
  from streamlit.runtime.scriptrunner import get_script_run_ctx
 
10
  from streamlit.web.server.websocket_headers import _get_websocket_headers
11
+ from llama_index import SimpleDirectoryReader
12
+ from llama_index import Prompt
13
+ from llama_index.chat_engine import CondenseQuestionChatEngine;
14
+ from llama_index.response_synthesizers import get_response_synthesizer
15
+ from llama_index import ServiceContext, SimpleDirectoryReader
16
+ from llama_index.node_parser import SimpleNodeParser
17
+ from llama_index.langchain_helpers.text_splitter import TokenTextSplitter
18
+ from llama_index.constants import DEFAULT_CHUNK_OVERLAP
19
+ from llama_index.response_synthesizers import get_response_synthesizer
20
+ from llama_index.callbacks import CallbackManager
21
+ from log import logger
22
 
23
  # 接続元制御
24
  ALLOW_IP_ADDRESS = os.environ["ALLOW_IP_ADDRESS"]
 
36
  # 接続元IP取得
37
  def get_remote_ip():
38
  ctx = get_script_run_ctx()
 
 
39
  session_info = runtime.get_instance().get_client(ctx.session_id)
 
 
40
  headers = _get_websocket_headers()
 
 
41
  return session_info.request.remote_ip, headers.get("X-Forwarded-For")
42
 
43
  # 接続元IP許可判定
44
  def is_allow_ip_address():
45
  remote_ip, x_forwarded_for = get_remote_ip()
46
+ logger.info("remote_ip:"+remote_ip)
 
47
  if x_forwarded_for is not None:
48
  remote_ip = x_forwarded_for
49
  # localhost
 
52
 
53
  # プライベートIP
54
  ipaddr = ipaddress.IPv4Address(remote_ip)
55
+ logger.info("ipaddr:"+str(ipaddr))
 
56
  if ipaddr.is_private:
57
  return True
58
 
 
67
  if "login_token" not in st.session_state or not st.session_state.login_token:
68
  st.warning("**ログインしてください**")
69
  st.stop()
70
+
71
+
72
+ index_name = os.environ["INDEX_NAME"]
73
+ pkl_name = os.environ["PKL_NAME"]
74
+ # デバッグ用
75
+ text_splitter = TokenTextSplitter( chunk_size=1500
76
+ , chunk_overlap=DEFAULT_CHUNK_OVERLAP
77
+ , tokenizer=tiktoken.encoding_for_model("gpt-3.5-turbo").encode)
78
+ node_parser = SimpleNodeParser(text_splitter=text_splitter)
79
+ custom_prompt = Prompt("""\
80
+ 以下はこれまでの会話履歴と、ドキュメントを検索して回答する必要がある、ユーザーからの会話文です。
81
+ 会話と新しい会話文に基づいて、検索クエリを作成します。
82
+ 挨拶された場合、挨拶を返してください。
83
+ 質問された場合、検索した結果の回答を返してください。
84
+ 答えを知らない場合は、「わかりません」と回答してください。
85
+ 全ての回答は日本語で行ってください。
86
+ 会話履歴:
87
+ {chat_history}
88
+ 新しい会話文:
89
+ {question}
90
+ Search query:
91
+ """)
92
+
93
+ chat_history = []
94
+ def fileImportChatEngine(uploaded_file):
95
+ filepath = None
96
+ try:
97
+ filepath = os.path.join('documents', os.path.basename( uploaded_file.name))
98
+ logger.info(filepath)
99
+ with open(filepath, 'wb') as f:
100
+ f.write(uploaded_file.getvalue())
101
+ f.close()
102
+ document = SimpleDirectoryReader(input_files=[filepath]).load_data()[0]
103
+ st.session_state.stored_docs.append(uploaded_file.name)
104
+ logger.info(st.session_state.stored_docs)
105
+ st.session_state.index.insert(document=document)
106
+ st.session_state.index.storage_context.persist(persist_dir=index_name)
107
+ setChatEngine()
108
+ with open(pkl_name, "wb") as f:
109
+ print("pickle")
110
+ pickle.dump(st.session_state.stored_docs, f)
111
+ st.session_state["file_uploader_key"] += 1
112
+ st.experimental_rerun()
113
+ except Exception as e:
114
+ # cleanup temp file
115
+ logger.error(e)
116
+ if filepath is not None and os.path.exists(filepath):
117
+ os.remove(filepath)
118
+
119
+ def fileImportChatEngineCustomloader(uploaded_file,loader):
120
+ filepath = None
121
+ try:
122
+ filepath = os.path.join('documents', os.path.basename( uploaded_file.name))
123
+ logger.info(filepath)
124
+ with open(filepath, 'wb') as f:
125
+ f.write(uploaded_file.getvalue())
126
+ f.close()
127
+ document = loader.load_data(file=Path(filepath))[0]
128
+ st.session_state.stored_docs.append(uploaded_file.name)
129
+ logger.info(st.session_state.stored_docs)
130
+ st.session_state.index.insert(document=document)
131
+ st.session_state.index.storage_context.persist(persist_dir=index_name)
132
+ setChatEngine()
133
+ with open(pkl_name, "wb") as f:
134
+ print("pickle")
135
+ pickle.dump(st.session_state.stored_docs, f)
136
+ st.session_state["file_uploader_key"] += 1
137
+ st.experimental_rerun()
138
+ except Exception as e:
139
+ # cleanup temp file
140
+ logger.error(e)
141
+ if filepath is not None and os.path.exists(filepath):
142
+ os.remove(filepath)
143
+
144
+ def setChatEngine():
145
+ callback_manager = CallbackManager([st.session_state.llama_debug_handler])
146
+ service_context = ServiceContext.from_defaults(node_parser=node_parser,callback_manager=callback_manager)
147
+ response_synthesizer = get_response_synthesizer(response_mode='refine')
148
+ st.session_state.query_engine = st.session_state.index.as_query_engine(
149
+ response_synthesizer=response_synthesizer,
150
+ service_context=service_context,
151
+ )
152
+ st.session_state.chat_engine = CondenseQuestionChatEngine.from_defaults(
153
+ query_engine=st.session_state.query_engine,
154
+ condense_question_prompt=custom_prompt,
155
+ chat_history=chat_history,
156
+ verbose=True
157
+ )
158
+
159
+ # chat mode reacの記述
160
+ # from langchain.prompts.chat import (
161
+ # ChatPromptTemplate,
162
+ # HumanMessagePromptTemplate,
163
+ # SystemMessagePromptTemplate,
164
+ # )
165
+ # from llama_index.prompts import Prompt
166
+ # chat_text_qa_msgs = [
167
+ # SystemMessagePromptTemplate.from_template(
168
+ # "文脈が役に立たない場合でも、必ず質問に答えてください。"
169
+ # ),
170
+ # HumanMessagePromptTemplate.from_template(
171
+ # "以下に、コンテキスト情報を提供します。 \n"
172
+ # "---------------------\n"
173
+ # "{context_str}"
174
+ # "\n---------------------\n"
175
+ # "回答には以下を含めてください。\n"
176
+ # "・最初に問い合わせへのお礼してください\n"
177
+ # "・自己紹介してください\n"
178
+ # "・質問内容を要約してください\n"
179
+ # "・最後に不明な点がないか確認してください \n"
180
+ # "この情報を踏まえて、次の質問に回答してください: {query_str}\n"
181
+ # "答えを知らない場合は、「わからない」と回答してください。また、日本語で回答してください。"
182
+ # ),
183
+ # ]
184
+ # def setChatEngine():
185
+ # callback_manager = CallbackManager([st.session_state.llama_debug_handler])
186
+ # service_context = ServiceContext.from_defaults(node_parser=node_parser,callback_manager=callback_manager)
187
+ # response_synthesizer = get_response_synthesizer(response_mode='refine')
188
+ # st.session_state.chat_engine = st.session_state.index.as_chat_engine(
189
+ # response_synthesizer=response_synthesizer,
190
+ # service_context=service_context,
191
+ # chat_mode="react",
192
+ # text_qa_template= Prompt.from_langchain_prompt(ChatPromptTemplate.from_messages(chat_text_qa_msgs)),
193
+ # verbose=True
194
+ # )
log.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ import logging
2
+
3
+ logging.basicConfig(level=logging.INFO)
4
+ logger = logging.getLogger("__name__")
5
+
pages/Chatbot.py CHANGED
@@ -1,33 +1,11 @@
1
 
2
  import streamlit as st
3
- import logging
4
-
5
- from llama_index import Prompt
6
-
7
  import common
 
8
 
9
- index_name = "./data/storage"
10
- pkl_name = "./data/stored_documents.pkl"
11
-
12
- custom_prompt = Prompt("""\
13
- 以下はこれまでの会話履歴と、ドキュメントを検索して回答する必要がある、ユーザーからの会話文です。
14
- 会話と新しい会話文に基づいて、検索クエリを作成します。回答は日本語で行います。
15
- 新しい会話文が挨拶の場合、挨拶を返してください。
16
- 新しい会話文が質問の場合、検索した結果の回答を返してください。
17
- 答えがわからない場合は正直にわからないと回答してください。
18
- 会話履歴:
19
- {chat_history}
20
- 新しい会話文:
21
- {question}
22
- Search query:
23
- """)
24
-
25
- chat_history = []
26
-
27
- logging.basicConfig(level=logging.INFO)
28
- logger = logging.getLogger("__name__")
29
- logger.debug("調査用ログ")
30
-
31
  common.check_login()
32
 
33
  st.title("💬 Chatbot")
@@ -47,9 +25,7 @@ if prompt := st.chat_input():
47
  st.session_state.messages.append({"role": "user", "content": prompt})
48
  st.chat_message("user").write(prompt)
49
  response = st.session_state.chat_engine.chat(prompt)
 
50
  msg = str(response)
51
  st.session_state.messages.append({"role": "assistant", "content": msg})
52
  st.chat_message("assistant").write(msg)
53
-
54
-
55
-
 
1
 
2
  import streamlit as st
 
 
 
 
3
  import common
4
+ import os
5
 
6
+ index_name = os.environ["INDEX_NAME"]
7
+ pkl_name = os.environ["PKL_NAME"]
8
+ from log import logger
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  common.check_login()
10
 
11
  st.title("💬 Chatbot")
 
25
  st.session_state.messages.append({"role": "user", "content": prompt})
26
  st.chat_message("user").write(prompt)
27
  response = st.session_state.chat_engine.chat(prompt)
28
+ # logger.info(st.session_state.llama_debug_handler.get_llm_inputs_outputs()[-1][-1])
29
  msg = str(response)
30
  st.session_state.messages.append({"role": "assistant", "content": msg})
31
  st.chat_message("assistant").write(msg)
 
 
 
pages/ChatbotWebRead.py CHANGED
@@ -1,19 +1,21 @@
1
 
2
  import streamlit as st
3
  import faiss
4
- import logging
5
-
6
- from llama_index.callbacks import CallbackManager, LlamaDebugHandler
7
- from llama_index import Prompt, ServiceContext
8
- from llama_index.chat_engine import CondenseQuestionChatEngine;
9
  from llama_index.node_parser import SimpleNodeParser
10
  from llama_index.langchain_helpers.text_splitter import TokenTextSplitter
11
  from llama_index.constants import DEFAULT_CHUNK_OVERLAP
12
  from llama_index.response_synthesizers import get_response_synthesizer
13
- from llama_index import ListIndex, SimpleWebPageReader
14
 
 
 
15
  import tiktoken
16
  import common
 
17
 
18
  custom_prompt = Prompt("""\
19
  以下はこれまでの会話履歴と、ドキュメントを検索して回答する必要がある、ユーザーからの会話文です。
@@ -27,13 +29,9 @@ custom_prompt = Prompt("""\
27
  {question}
28
  Search query:
29
  """)
30
-
31
  chat_history = []
32
 
33
- logging.basicConfig(level=logging.INFO)
34
- logger = logging.getLogger("__name__")
35
- logger.debug("調査用ログ")
36
-
37
 
38
  common.check_login()
39
 
@@ -45,27 +43,29 @@ URLtext = st.text_input(
45
  )
46
 
47
  if st.button("URL reading",use_container_width=True):
48
- text_splitter = TokenTextSplitter(separator="。", chunk_size=1500
49
  , chunk_overlap=DEFAULT_CHUNK_OVERLAP
50
  , tokenizer=tiktoken.encoding_for_model("gpt-3.5-turbo").encode)
51
  node_parser = SimpleNodeParser(text_splitter=text_splitter)
52
  d = 1536
53
  k=2
54
  faiss_index = faiss.IndexFlatL2(d)
55
- # デバッグ用
56
- llama_debug_handler = LlamaDebugHandler()
57
- callback_manager = CallbackManager([llama_debug_handler])
58
  service_context = ServiceContext.from_defaults(node_parser=node_parser,callback_manager=callback_manager)
59
 
60
  webDocuments = SimpleWebPageReader(html_to_text=True).load_data(
61
  [URLtext]
62
  )
63
  logger.info(webDocuments)
64
- webIndex = ListIndex.from_documents(webDocuments,service_context=service_context)
65
- response_synthesizer = get_response_synthesizer(response_mode='compact')
66
- webQuery_engine = webIndex.as_query_engine(response_synthesizer=response_synthesizer,service_context=service_context)
 
 
 
67
  st.session_state.web_chat_engine = CondenseQuestionChatEngine.from_defaults(
68
- query_engine=webQuery_engine,
69
  condense_question_prompt=custom_prompt,
70
  chat_history=chat_history,
71
  verbose=True
@@ -87,8 +87,7 @@ if prompt := st.chat_input(disabled = not URLtext):
87
  st.session_state.webmessages.append({"role": "user", "content": prompt})
88
  st.chat_message("user").write(prompt)
89
  response = st.session_state.web_chat_engine.chat(prompt)
 
90
  msg = str(response)
91
  st.session_state.webmessages.append({"role": "assistant", "content": msg})
92
  st.chat_message("assistant").write(msg)
93
-
94
-
 
1
 
2
  import streamlit as st
3
  import faiss
4
+ import langchain
5
+ from llama_index.callbacks import CallbackManager
6
+ from llama_index import ServiceContext,VectorStoreIndex
7
+ from llama_index.chat_engine import CondenseQuestionChatEngine
 
8
  from llama_index.node_parser import SimpleNodeParser
9
  from llama_index.langchain_helpers.text_splitter import TokenTextSplitter
10
  from llama_index.constants import DEFAULT_CHUNK_OVERLAP
11
  from llama_index.response_synthesizers import get_response_synthesizer
12
+ from llama_index import SimpleWebPageReader
13
 
14
+ # from llama_index.prompts import Prompt
15
+ from llama_index import Prompt
16
  import tiktoken
17
  import common
18
+ langchain.verbose = True
19
 
20
  custom_prompt = Prompt("""\
21
  以下はこれまでの会話履歴と、ドキュメントを検索して回答する必要がある、ユーザーからの会話文です。
 
29
  {question}
30
  Search query:
31
  """)
 
32
  chat_history = []
33
 
34
+ from log import logger
 
 
 
35
 
36
  common.check_login()
37
 
 
43
  )
44
 
45
  if st.button("URL reading",use_container_width=True):
46
+ text_splitter = TokenTextSplitter( chunk_size=1500
47
  , chunk_overlap=DEFAULT_CHUNK_OVERLAP
48
  , tokenizer=tiktoken.encoding_for_model("gpt-3.5-turbo").encode)
49
  node_parser = SimpleNodeParser(text_splitter=text_splitter)
50
  d = 1536
51
  k=2
52
  faiss_index = faiss.IndexFlatL2(d)
53
+
54
+ callback_manager = CallbackManager([st.session_state.llama_debug_handler])
 
55
  service_context = ServiceContext.from_defaults(node_parser=node_parser,callback_manager=callback_manager)
56
 
57
  webDocuments = SimpleWebPageReader(html_to_text=True).load_data(
58
  [URLtext]
59
  )
60
  logger.info(webDocuments)
61
+ webIndex = VectorStoreIndex.from_documents(webDocuments,service_context=service_context)
62
+ response_synthesizer = get_response_synthesizer(response_mode='refine')
63
+ st.session_state.webQuery_engine = webIndex.as_query_engine(
64
+ response_synthesizer=response_synthesizer,
65
+ service_context=service_context,
66
+ )
67
  st.session_state.web_chat_engine = CondenseQuestionChatEngine.from_defaults(
68
+ query_engine=st.session_state.webQuery_engine,
69
  condense_question_prompt=custom_prompt,
70
  chat_history=chat_history,
71
  verbose=True
 
87
  st.session_state.webmessages.append({"role": "user", "content": prompt})
88
  st.chat_message("user").write(prompt)
89
  response = st.session_state.web_chat_engine.chat(prompt)
90
+ logger.debug(st.session_state.llama_debug_handler.get_llm_inputs_outputs())
91
  msg = str(response)
92
  st.session_state.webmessages.append({"role": "assistant", "content": msg})
93
  st.chat_message("assistant").write(msg)
 
 
pages/ImportExcelFile.py CHANGED
@@ -1,80 +1,21 @@
1
  import streamlit as st
2
- import anthropic
3
- from pathlib import Path
4
- from llama_index import download_loader,Prompt
5
- import os
6
- import pickle
7
- import logging
8
  import common
9
 
10
- from llama_index.chat_engine import CondenseQuestionChatEngine;
11
- from llama_index.response_synthesizers import get_response_synthesizer
12
-
13
- index_name = "./data/storage"
14
- pkl_name = "./data/stored_documents.pkl"
15
-
16
- custom_prompt = Prompt("""\
17
- 以下はこれまでの会話履歴と、ドキュメントを検索して回答する必要がある、ユーザーからの会話文です。
18
- 会話と新しい会話文に基づいて、検索クエリを作成します。回答は日本語で行います。
19
- 新しい会話文が挨拶の場合、挨拶を返してください。
20
- 新しい会話文が質問の場合、検索した結果の回答を返してください。
21
- 答えがわからない場合は正直にわからないと回答してください。
22
- 会話履歴:
23
- {chat_history}
24
- 新しい会話文:
25
- {question}
26
- Search query:
27
- """)
28
-
29
- chat_history = []
30
-
31
-
32
- logging.basicConfig(level=logging.INFO)
33
- logger = logging.getLogger("__name__")
34
- logger.debug("調査用ログ")
35
 
36
  common.check_login()
37
 
38
- PandasExcelReader = download_loader("PandasExcelReader")
39
  loader = PandasExcelReader(pandas_config={"header": 0})
40
 
41
  if "file_uploader_key" not in st.session_state:
42
  st.session_state["file_uploader_key"] = 0
43
 
44
- st.title("📝 ImportPptxFile")
45
  uploaded_file = st.file_uploader("Upload an article", type=("xlsx"))
46
 
47
  if st.button("import",use_container_width=True):
48
- filepath = None
49
- try:
50
- filepath = os.path.join('documents', os.path.basename( uploaded_file.name))
51
- logger.info(filepath)
52
- with open(filepath, 'wb') as f:
53
- f.write(uploaded_file.getvalue())
54
- f.close()
55
- document = loader.load_data(file=filepath)[0]
56
- st.session_state.stored_docs.append(uploaded_file.name)
57
- logger.info(st.session_state.stored_docs)
58
- st.session_state.index.insert(document=document)
59
- st.session_state.index.storage_context.persist(persist_dir=index_name)
60
- response_synthesizer = get_response_synthesizer(response_mode='refine')
61
- st.session_state.query_engine = st.session_state.index.as_query_engine(response_synthesizer=response_synthesizer)
62
- st.session_state.chat_engine = CondenseQuestionChatEngine.from_defaults(
63
- query_engine=st.session_state.query_engine,
64
- condense_question_prompt=custom_prompt,
65
- chat_history=chat_history,
66
- verbose=True
67
- )
68
- with open(pkl_name, "wb") as f:
69
- print("pickle")
70
- pickle.dump(st.session_state.stored_docs, f)
71
- st.session_state["file_uploader_key"] += 1
72
- st.experimental_rerun()
73
- except Exception as e:
74
- # cleanup temp file
75
- logger.error(e)
76
- if filepath is not None and os.path.exists(filepath):
77
- os.remove(filepath)
78
 
79
  st.subheader("Import File List")
80
  if "stored_docs" in st.session_state:
 
1
  import streamlit as st
 
 
 
 
 
 
2
  import common
3
 
4
+ from llama_hub.file.pandas_excel.base import PandasExcelReader
5
+ from log import logger
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
  common.check_login()
8
 
 
9
  loader = PandasExcelReader(pandas_config={"header": 0})
10
 
11
  if "file_uploader_key" not in st.session_state:
12
  st.session_state["file_uploader_key"] = 0
13
 
14
+ st.title("📝 ImportExcelFile")
15
  uploaded_file = st.file_uploader("Upload an article", type=("xlsx"))
16
 
17
  if st.button("import",use_container_width=True):
18
+ common.fileImportChatEngineCustomloader(uploaded_file,loader)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
  st.subheader("Import File List")
21
  if "stored_docs" in st.session_state:
pages/ImportFile.py CHANGED
@@ -1,39 +1,7 @@
1
- import openai
2
  import streamlit as st
3
- import os
4
- import pickle
5
- import logging
6
-
7
- from llama_index import SimpleDirectoryReader
8
- from llama_index.chat_engine import CondenseQuestionChatEngine;
9
- from llama_index.response_synthesizers import get_response_synthesizer
10
- from llama_index import Prompt, SimpleDirectoryReader
11
-
12
- from logging import getLogger, StreamHandler, Formatter
13
-
14
  import common
15
 
16
- index_name = "./data/storage"
17
- pkl_name = "./data/stored_documents.pkl"
18
-
19
- custom_prompt = Prompt("""\
20
- 以下はこれまでの会話履歴と、ドキュメントを検索して回答する必要がある、ユーザーからの会話文です。
21
- 会話と新しい会話文に基づいて、検索クエリを作成します。回答は日本語で行います。
22
- 新しい会話文が挨拶の場合、挨拶を返してください。
23
- 新しい会話文が質問の場合、検索した結果の回答を返してください。
24
- 答えがわからない場合は正直にわからないと回答してください。
25
- 会話履歴:
26
- {chat_history}
27
- 新しい会話文:
28
- {question}
29
- Search query:
30
- """)
31
-
32
- chat_history = []
33
-
34
- logging.basicConfig(level=logging.INFO)
35
- logger = logging.getLogger("__name__")
36
- logger.debug("調査用ログ")
37
 
38
  common.check_login()
39
 
@@ -42,39 +10,9 @@ if "file_uploader_key" not in st.session_state:
42
 
43
  st.title("📝 ImportFile")
44
 
45
- uploaded_file = st.file_uploader("Upload an article", type=("txt", "md","pdf"),key=st.session_state["file_uploader_key"])
46
  if st.button("import",use_container_width=True):
47
- filepath = None
48
- try:
49
- filepath = os.path.join('documents', os.path.basename( uploaded_file.name))
50
- logger.info(filepath)
51
- with open(filepath, 'wb') as f:
52
- f.write(uploaded_file.getvalue())
53
- f.close()
54
- document = SimpleDirectoryReader(input_files=[filepath]).load_data()[0]
55
- logger.info(document)
56
- st.session_state.stored_docs.append(uploaded_file.name)
57
- logger.info(st.session_state.stored_docs)
58
- st.session_state.index.insert(document=document)
59
- st.session_state.index.storage_context.persist(persist_dir=index_name)
60
- response_synthesizer = get_response_synthesizer(response_mode='refine')
61
- st.session_state.query_engine = st.session_state.index.as_query_engine(response_synthesizer=response_synthesizer)
62
- st.session_state.chat_engine = CondenseQuestionChatEngine.from_defaults(
63
- query_engine=st.session_state.query_engine,
64
- condense_question_prompt=custom_prompt,
65
- chat_history=chat_history,
66
- verbose=True
67
- )
68
- with open(pkl_name, "wb") as f:
69
- print("pickle")
70
- pickle.dump(st.session_state.stored_docs, f)
71
- st.session_state["file_uploader_key"] += 1
72
- st.experimental_rerun()
73
- except Exception as e:
74
- # cleanup temp file
75
- logger.error(e)
76
- if filepath is not None and os.path.exists(filepath):
77
- os.remove(filepath)
78
 
79
  st.subheader("Import File List")
80
  if "stored_docs" in st.session_state:
 
 
1
  import streamlit as st
 
 
 
 
 
 
 
 
 
 
 
2
  import common
3
 
4
+ from log import logger
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
  common.check_login()
7
 
 
10
 
11
  st.title("📝 ImportFile")
12
 
13
+ uploaded_file = st.file_uploader("Upload an article", type=("txt", "md"),key=st.session_state["file_uploader_key"])
14
  if st.button("import",use_container_width=True):
15
+ common.fileImportChatEngine(uploaded_file)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
  st.subheader("Import File List")
18
  if "stored_docs" in st.session_state:
pages/ImportPdfFile.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import common
3
+
4
+ from llama_hub.file.cjk_pdf.base import CJKPDFReader
5
+ from log import logger
6
+
7
+ common.check_login()
8
+
9
+ loader = CJKPDFReader()
10
+
11
+ if "file_uploader_key" not in st.session_state:
12
+ st.session_state["file_uploader_key"] = 0
13
+
14
+ st.title("📝 ImportPdfFile")
15
+ uploaded_file = st.file_uploader("Upload an article", type=("pdf"))
16
+
17
+ if st.button("import",use_container_width=True):
18
+ common.fileImportChatEngineCustomloader(uploaded_file,loader)
19
+
20
+ st.subheader("Import File List")
21
+ if "stored_docs" in st.session_state:
22
+ logger.info(st.session_state.stored_docs)
23
+ for docname in st.session_state.stored_docs:
24
+ st.write(docname)
pages/ImportPptxFile.py CHANGED
@@ -1,41 +1,11 @@
1
  import streamlit as st
2
- import anthropic
3
- from pathlib import Path
4
- from llama_index import download_loader,Prompt
5
- import os
6
- import pickle
7
- import logging
8
  import common
9
 
10
- from llama_index.chat_engine import CondenseQuestionChatEngine;
11
- from llama_index.response_synthesizers import get_response_synthesizer
12
-
13
- index_name = "./data/storage"
14
- pkl_name = "./data/stored_documents.pkl"
15
-
16
- custom_prompt = Prompt("""\
17
- 以下はこれまでの会話履歴と、ドキュメントを検索して回答する必要がある、ユーザーからの会話文です。
18
- 会話と新しい会話文に基づいて、検索クエリを作成します。回答は日本語で行います。
19
- 新しい会話文が挨拶の場合、挨拶を返してください。
20
- 新しい会話文が質問の場合、検索した結果の回答を返してください。
21
- 答えがわからない場合は正直にわからないと回答してください。
22
- 会話履歴:
23
- {chat_history}
24
- 新しい会話文:
25
- {question}
26
- Search query:
27
- """)
28
-
29
- chat_history = []
30
-
31
-
32
- logging.basicConfig(level=logging.INFO)
33
- logger = logging.getLogger("__name__")
34
- logger.debug("調査用ログ")
35
 
36
  common.check_login()
37
 
38
- PptxReader = download_loader("PptxReader")
39
  loader = PptxReader()
40
 
41
  if "file_uploader_key" not in st.session_state:
@@ -45,36 +15,7 @@ st.title("📝 ImportPptxFile")
45
  uploaded_file = st.file_uploader("Upload an article", type=("pptx"))
46
 
47
  if st.button("import",use_container_width=True):
48
- filepath = None
49
- try:
50
- filepath = os.path.join('documents', os.path.basename( uploaded_file.name))
51
- logger.info(filepath)
52
- with open(filepath, 'wb') as f:
53
- f.write(uploaded_file.getvalue())
54
- f.close()
55
- document = loader.load_data(file=filepath)[0]
56
- st.session_state.stored_docs.append(uploaded_file.name)
57
- logger.info(st.session_state.stored_docs)
58
- st.session_state.index.insert(document=document)
59
- st.session_state.index.storage_context.persist(persist_dir=index_name)
60
- response_synthesizer = get_response_synthesizer(response_mode='refine')
61
- st.session_state.query_engine = st.session_state.index.as_query_engine(response_synthesizer=response_synthesizer)
62
- st.session_state.chat_engine = CondenseQuestionChatEngine.from_defaults(
63
- query_engine=st.session_state.query_engine,
64
- condense_question_prompt=custom_prompt,
65
- chat_history=chat_history,
66
- verbose=True
67
- )
68
- with open(pkl_name, "wb") as f:
69
- print("pickle")
70
- pickle.dump(st.session_state.stored_docs, f)
71
- st.session_state["file_uploader_key"] += 1
72
- st.experimental_rerun()
73
- except Exception as e:
74
- # cleanup temp file
75
- logger.error(e)
76
- if filepath is not None and os.path.exists(filepath):
77
- os.remove(filepath)
78
 
79
  st.subheader("Import File List")
80
  if "stored_docs" in st.session_state:
 
1
  import streamlit as st
 
 
 
 
 
 
2
  import common
3
 
4
+ from llama_hub.file.pptx.base import PptxReader
5
+ from log import logger
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
  common.check_login()
8
 
 
9
  loader = PptxReader()
10
 
11
  if "file_uploader_key" not in st.session_state:
 
15
  uploaded_file = st.file_uploader("Upload an article", type=("pptx"))
16
 
17
  if st.button("import",use_container_width=True):
18
+ common.fileImportChatEngineCustomloader(uploaded_file,loader)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
  st.subheader("Import File List")
21
  if "stored_docs" in st.session_state:
pages/ImportWordFile.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import common
3
+
4
+ from llama_hub.file.docx.base import DocxReader
5
+ from log import logger
6
+
7
+ common.check_login()
8
+
9
+ loader = DocxReader()
10
+
11
+ if "file_uploader_key" not in st.session_state:
12
+ st.session_state["file_uploader_key"] = 0
13
+
14
+ st.title("📝 ImportWordFile")
15
+ uploaded_file = st.file_uploader("Upload an article", type=("docx"))
16
+
17
+ if st.button("import",use_container_width=True):
18
+ common.fileImportChatEngineCustomloader(uploaded_file,loader)
19
+
20
+ st.subheader("Import File List")
21
+ if "stored_docs" in st.session_state:
22
+ logger.info(st.session_state.stored_docs)
23
+ for docname in st.session_state.stored_docs:
24
+ st.write(docname)
requirements.txt CHANGED
@@ -3,7 +3,8 @@ langchain>=0.0.217
3
  openai
4
  duckduckgo-search
5
  anthropic
6
- llama-index==0.7.4
 
7
  pypdf==3.9.0
8
  faiss-cpu==1.7.4
9
  html2text
@@ -11,5 +12,12 @@ streamlit-authenticator
11
  extra_streamlit_components
12
  requests_oauthlib
13
  python-dotenv
 
 
 
 
 
14
  llama_hub
15
  msal-streamlit-authentication
 
 
 
3
  openai
4
  duckduckgo-search
5
  anthropic
6
+ nltk
7
+ llama-index==0.8.4
8
  pypdf==3.9.0
9
  faiss-cpu==1.7.4
10
  html2text
 
12
  extra_streamlit_components
13
  requests_oauthlib
14
  python-dotenv
15
+ torch
16
+ transformers
17
+ python-pptx
18
+ Pillow
19
+ openpyxl
20
  llama_hub
21
  msal-streamlit-authentication
22
+ pdfminer.six
23
+ docx2txt