Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -3,7 +3,7 @@ from langchain_openai import ChatOpenAI, OpenAIEmbeddings
|
|
| 3 |
import os
|
| 4 |
import dotenv
|
| 5 |
from langchain_community.document_loaders import TextLoader, PyPDFLoader, CSVLoader, UnstructuredPowerPointLoader, UnstructuredWordDocumentLoader, UnstructuredExcelLoader
|
| 6 |
-
from langchain_text_splitters import
|
| 7 |
from langchain_chroma import Chroma
|
| 8 |
from langchain.chains.combine_documents import create_stuff_documents_chain
|
| 9 |
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
|
|
@@ -26,6 +26,7 @@ with st.sidebar:
|
|
| 26 |
|
| 27 |
# Main app logic
|
| 28 |
if "OPENAI_API_KEY" in os.environ:
|
|
|
|
| 29 |
st.header('Multiple File Upload')
|
| 30 |
uploaded_files = st.file_uploader('Upload your files', accept_multiple_files=True, type=['txt', 'pdf', 'csv', 'ppt', 'doc', 'xls', 'pptx', 'xlsx'])
|
| 31 |
|
|
@@ -55,29 +56,23 @@ if "OPENAI_API_KEY" in os.environ:
|
|
| 55 |
os.unlink(temp_file_path)
|
| 56 |
return documents
|
| 57 |
|
| 58 |
-
|
| 59 |
-
|
| 60 |
all_documents = []
|
| 61 |
for file in uploaded_files:
|
| 62 |
all_documents.extend(load_file(file))
|
| 63 |
-
|
| 64 |
-
text_splitter = TokenTextSplitter(chunk_size=1000, chunk_overlap=200)
|
| 65 |
-
all_splits = text_splitter.split_documents(all_documents)
|
| 66 |
-
return all_splits
|
| 67 |
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
with st.spinner("Processing documents..."):
|
| 71 |
-
all_splits = process_documents(uploaded_files)
|
| 72 |
|
| 73 |
# Initialize components
|
| 74 |
@st.cache_resource
|
| 75 |
def initialize_components(_all_splits):
|
| 76 |
dotenv.load_dotenv()
|
| 77 |
-
chat = ChatOpenAI(model="gpt-3.5-turbo-
|
| 78 |
-
embeddings = OpenAIEmbeddings(model="text-embedding-
|
| 79 |
vectorstore = Chroma.from_documents(documents=_all_splits, embedding=embeddings)
|
| 80 |
-
retriever = vectorstore.as_retriever(
|
| 81 |
|
| 82 |
SYSTEM_TEMPLATE = """
|
| 83 |
You are an advanced AI assistant designed for document search and chatbot functionality. Your primary functions are:
|
|
@@ -159,19 +154,19 @@ if "OPENAI_API_KEY" in os.environ:
|
|
| 159 |
# Retrieve relevant documents
|
| 160 |
docs = retriever.get_relevant_documents(prompt)
|
| 161 |
|
| 162 |
-
# Get recent chat history
|
| 163 |
-
MAX_HISTORY = 5
|
| 164 |
-
recent_history = st.session_state.memory.load_memory_variables({})["chat_history"][-MAX_HISTORY:]
|
| 165 |
-
|
| 166 |
# Generate response
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 175 |
message_placeholder.markdown(full_response)
|
| 176 |
|
| 177 |
# Add assistant response to chat history
|
|
|
|
| 3 |
import os
|
| 4 |
import dotenv
|
| 5 |
from langchain_community.document_loaders import TextLoader, PyPDFLoader, CSVLoader, UnstructuredPowerPointLoader, UnstructuredWordDocumentLoader, UnstructuredExcelLoader
|
| 6 |
+
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
| 7 |
from langchain_chroma import Chroma
|
| 8 |
from langchain.chains.combine_documents import create_stuff_documents_chain
|
| 9 |
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
|
|
|
|
| 26 |
|
| 27 |
# Main app logic
|
| 28 |
if "OPENAI_API_KEY" in os.environ:
|
| 29 |
+
# with st.sidebar:
|
| 30 |
st.header('Multiple File Upload')
|
| 31 |
uploaded_files = st.file_uploader('Upload your files', accept_multiple_files=True, type=['txt', 'pdf', 'csv', 'ppt', 'doc', 'xls', 'pptx', 'xlsx'])
|
| 32 |
|
|
|
|
| 56 |
os.unlink(temp_file_path)
|
| 57 |
return documents
|
| 58 |
|
| 59 |
+
# Process uploaded files
|
| 60 |
+
if uploaded_files:
|
| 61 |
all_documents = []
|
| 62 |
for file in uploaded_files:
|
| 63 |
all_documents.extend(load_file(file))
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
|
| 65 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
|
| 66 |
+
all_splits = text_splitter.split_documents(all_documents)
|
|
|
|
|
|
|
| 67 |
|
| 68 |
# Initialize components
|
| 69 |
@st.cache_resource
|
| 70 |
def initialize_components(_all_splits):
|
| 71 |
dotenv.load_dotenv()
|
| 72 |
+
chat = ChatOpenAI(model="gpt-3.5-turbo-1106", temperature=0.2)
|
| 73 |
+
embeddings = OpenAIEmbeddings(model="text-embedding-ada-002")
|
| 74 |
vectorstore = Chroma.from_documents(documents=_all_splits, embedding=embeddings)
|
| 75 |
+
retriever = vectorstore.as_retriever(k=4)
|
| 76 |
|
| 77 |
SYSTEM_TEMPLATE = """
|
| 78 |
You are an advanced AI assistant designed for document search and chatbot functionality. Your primary functions are:
|
|
|
|
| 154 |
# Retrieve relevant documents
|
| 155 |
docs = retriever.get_relevant_documents(prompt)
|
| 156 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 157 |
# Generate response
|
| 158 |
+
response = document_chain.invoke(
|
| 159 |
+
{
|
| 160 |
+
"context": docs,
|
| 161 |
+
"chat_history": st.session_state.memory.load_memory_variables({})["chat_history"],
|
| 162 |
+
"messages": [
|
| 163 |
+
HumanMessage(content=prompt)
|
| 164 |
+
],
|
| 165 |
+
}
|
| 166 |
+
)
|
| 167 |
+
|
| 168 |
+
# The response is already a string, so we can use it directly
|
| 169 |
+
full_response = response
|
| 170 |
message_placeholder.markdown(full_response)
|
| 171 |
|
| 172 |
# Add assistant response to chat history
|