Update app.py
Browse files
app.py
CHANGED
|
@@ -13,7 +13,7 @@ load_dotenv()
|
|
| 13 |
|
| 14 |
icons = {"assistant": "robot.png", "user": "man-kddi.png"}
|
| 15 |
|
| 16 |
-
# Configure the Llama index settings for Google/Gemma-7B-IT model and English embedding
|
| 17 |
Settings.llm = HuggingFaceInferenceAPI(
|
| 18 |
model_name="google/gemma-7b-it",
|
| 19 |
tokenizer_name="google/gemma-7b-it",
|
|
@@ -24,7 +24,7 @@ Settings.llm = HuggingFaceInferenceAPI(
|
|
| 24 |
)
|
| 25 |
|
| 26 |
Settings.embed_model = HuggingFaceEmbedding(
|
| 27 |
-
model_name="BAAI/bge-small-en-v1.5" # English embedding model; adjust if Italian model is available
|
| 28 |
)
|
| 29 |
|
| 30 |
# Define the directory for persistent storage and data
|
|
@@ -43,12 +43,12 @@ def displayPDF(file):
|
|
| 43 |
|
| 44 |
def displayDOCX(file):
|
| 45 |
text = docx2txt.process(file)
|
| 46 |
-
st.text_area("
|
| 47 |
|
| 48 |
def displayTXT(file):
|
| 49 |
with open(file, "r") as f:
|
| 50 |
text = f.read()
|
| 51 |
-
st.text_area("
|
| 52 |
|
| 53 |
def data_ingestion():
|
| 54 |
documents = SimpleDirectoryReader(DATA_DIR).load_data()
|
|
@@ -62,10 +62,10 @@ def handle_query(query):
|
|
| 62 |
chat_text_qa_msgs = [
|
| 63 |
(
|
| 64 |
"user",
|
| 65 |
-
"""
|
| 66 |
-
|
| 67 |
{context_str}
|
| 68 |
-
|
| 69 |
{query_str}
|
| 70 |
"""
|
| 71 |
)
|
|
@@ -79,14 +79,14 @@ def handle_query(query):
|
|
| 79 |
elif isinstance(answer, dict) and 'response' in answer:
|
| 80 |
return answer['response']
|
| 81 |
else:
|
| 82 |
-
return "
|
| 83 |
|
| 84 |
# Streamlit app initialization
|
| 85 |
-
st.title("
|
| 86 |
-
st.markdown("
|
| 87 |
|
| 88 |
if 'messages' not in st.session_state:
|
| 89 |
-
st.session_state.messages = [{'role': 'assistant', "content": '
|
| 90 |
|
| 91 |
for message in st.session_state.messages:
|
| 92 |
with st.chat_message(message['role'], avatar=icons[message['role']]):
|
|
@@ -94,9 +94,9 @@ for message in st.session_state.messages:
|
|
| 94 |
|
| 95 |
with st.sidebar:
|
| 96 |
st.title("Menu:")
|
| 97 |
-
uploaded_file = st.file_uploader("
|
| 98 |
-
if st.button("
|
| 99 |
-
with st.spinner("
|
| 100 |
file_extension = os.path.splitext(uploaded_file.name)[1].lower()
|
| 101 |
filepath = os.path.join(DATA_DIR, "uploaded_file" + file_extension)
|
| 102 |
with open(filepath, "wb") as f:
|
|
@@ -110,9 +110,9 @@ with st.sidebar:
|
|
| 110 |
displayTXT(filepath)
|
| 111 |
|
| 112 |
data_ingestion() # Process file every time a new file is uploaded
|
| 113 |
-
st.success("
|
| 114 |
|
| 115 |
-
user_prompt = st.text_input("
|
| 116 |
|
| 117 |
if user_prompt and uploaded_file:
|
| 118 |
st.session_state.messages.append({'role': 'user', "content": user_prompt})
|
|
@@ -120,7 +120,7 @@ if user_prompt and uploaded_file:
|
|
| 120 |
st.write(user_prompt)
|
| 121 |
|
| 122 |
# Trigger assistant's response retrieval and update UI
|
| 123 |
-
with st.spinner("
|
| 124 |
response = handle_query(user_prompt)
|
| 125 |
with st.chat_message("assistant", avatar=icons["assistant"]):
|
| 126 |
st.write(response)
|
|
|
|
| 13 |
|
| 14 |
icons = {"assistant": "robot.png", "user": "man-kddi.png"}
|
| 15 |
|
| 16 |
+
# Configure the Llama index settings for the Google/Gemma-7B-IT model and English embedding
|
| 17 |
Settings.llm = HuggingFaceInferenceAPI(
|
| 18 |
model_name="google/gemma-7b-it",
|
| 19 |
tokenizer_name="google/gemma-7b-it",
|
|
|
|
| 24 |
)
|
| 25 |
|
| 26 |
Settings.embed_model = HuggingFaceEmbedding(
|
| 27 |
+
model_name="BAAI/bge-small-en-v1.5" # English embedding model; adjust if an Italian model is available
|
| 28 |
)
|
| 29 |
|
| 30 |
# Define the directory for persistent storage and data
|
|
|
|
| 43 |
|
| 44 |
def displayDOCX(file):
|
| 45 |
text = docx2txt.process(file)
|
| 46 |
+
st.text_area("Document Content", text, height=400)
|
| 47 |
|
| 48 |
def displayTXT(file):
|
| 49 |
with open(file, "r") as f:
|
| 50 |
text = f.read()
|
| 51 |
+
st.text_area("Document Content", text, height=400)
|
| 52 |
|
| 53 |
def data_ingestion():
|
| 54 |
documents = SimpleDirectoryReader(DATA_DIR).load_data()
|
|
|
|
| 62 |
chat_text_qa_msgs = [
|
| 63 |
(
|
| 64 |
"user",
|
| 65 |
+
"""You are a Q&A assistant named CHAT-DOC. Your main goal is to provide answers as accurately as possible, based on the instructions and context given to you. If a question does not match the provided context or is outside the scope of the document, kindly advise the user to ask questions within the context of the document.
|
| 66 |
+
Context:
|
| 67 |
{context_str}
|
| 68 |
+
Question:
|
| 69 |
{query_str}
|
| 70 |
"""
|
| 71 |
)
|
|
|
|
| 79 |
elif isinstance(answer, dict) and 'response' in answer:
|
| 80 |
return answer['response']
|
| 81 |
else:
|
| 82 |
+
return "Sorry, I couldn't find an answer."
|
| 83 |
|
| 84 |
# Streamlit app initialization
|
| 85 |
+
st.title("Chat with Your Document 📄")
|
| 86 |
+
st.markdown("Chat here👇")
|
| 87 |
|
| 88 |
if 'messages' not in st.session_state:
|
| 89 |
+
st.session_state.messages = [{'role': 'assistant', "content": 'Hello! Upload a PDF, DOCX, or TXT file and ask me anything about its content.'}]
|
| 90 |
|
| 91 |
for message in st.session_state.messages:
|
| 92 |
with st.chat_message(message['role'], avatar=icons[message['role']]):
|
|
|
|
| 94 |
|
| 95 |
with st.sidebar:
|
| 96 |
st.title("Menu:")
|
| 97 |
+
uploaded_file = st.file_uploader("Upload your document (PDF, DOCX, TXT)", type=["pdf", "docx", "txt"])
|
| 98 |
+
if st.button("Submit & Process") and uploaded_file:
|
| 99 |
+
with st.spinner("Processing..."):
|
| 100 |
file_extension = os.path.splitext(uploaded_file.name)[1].lower()
|
| 101 |
filepath = os.path.join(DATA_DIR, "uploaded_file" + file_extension)
|
| 102 |
with open(filepath, "wb") as f:
|
|
|
|
| 110 |
displayTXT(filepath)
|
| 111 |
|
| 112 |
data_ingestion() # Process file every time a new file is uploaded
|
| 113 |
+
st.success("Done")
|
| 114 |
|
| 115 |
+
user_prompt = st.text_input("Ask me anything about the content of the document:")
|
| 116 |
|
| 117 |
if user_prompt and uploaded_file:
|
| 118 |
st.session_state.messages.append({'role': 'user', "content": user_prompt})
|
|
|
|
| 120 |
st.write(user_prompt)
|
| 121 |
|
| 122 |
# Trigger assistant's response retrieval and update UI
|
| 123 |
+
with st.spinner("Thinking..."):
|
| 124 |
response = handle_query(user_prompt)
|
| 125 |
with st.chat_message("assistant", avatar=icons["assistant"]):
|
| 126 |
st.write(response)
|