Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| from langchain.embeddings import OpenAIEmbeddings | |
| from langchain.vectorstores import Chroma | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain.chat_models import ChatOpenAI | |
| from langchain.chains import RetrievalQA | |
| from langchain_community.document_loaders import TextLoader | |
| import chromadb | |
| chromadb.api.client.SharedSystemClient.clear_system_cache() | |
| import os | |
| os.environ["OPENAI_API_KEY"] = os.getenv('OPENAI_API_KEY') | |
| # Initialize the embeddings and model | |
| embd = OpenAIEmbeddings() | |
| llm = ChatOpenAI(model_name="gpt-4o", temperature=0) | |
| # Initialize conversation history | |
| if "conversation_history" not in st.session_state: | |
| st.session_state.conversation_history = [] | |
| # Define the Streamlit app | |
| st.title("Text File Question-Answering with History") | |
| st.subheader("Upload a text file and ask questions. The app will maintain a conversation history.") | |
| # File upload section | |
| uploaded_file = st.file_uploader("Upload a text file", type=["txt"]) | |
| from langchain.docstore.document import Document | |
| if uploaded_file: | |
| # Read and decode the content of the uploaded file | |
| file_content = uploaded_file.read().decode("utf-8") | |
| # Convert the content into a LangChain document | |
| document = [Document(page_content=file_content)] | |
| # Split the loaded document | |
| text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0) | |
| doc_splits = text_splitter.split_documents(document) | |
| # Create a vector store | |
| vectorstore = Chroma.from_documents( | |
| documents=doc_splits, | |
| collection_name="conversation_history", | |
| embedding=embd, | |
| persist_directory=None | |
| ) | |
| retriever = vectorstore.as_retriever() | |
| # Initialize the QA chain | |
| qa_chain = RetrievalQA.from_chain_type( | |
| llm=llm, | |
| chain_type="stuff", | |
| retriever=retriever, | |
| return_source_documents=True, | |
| ) | |
| # Question-answering section | |
| query = st.text_input("Ask a question:") | |
| if query: | |
| # Process the query | |
| result = qa_chain({"query": query}) | |
| answer = result["result"] | |
| sources = result["source_documents"] | |
| # Append to conversation history | |
| st.session_state.conversation_history.append((query, answer, sources)) | |
| # Display the current answer | |
| st.write("**Answer:**", answer) | |
| # Display the sources | |
| st.subheader("Source Documents") | |
| for i, doc in enumerate(sources, start=1): | |
| st.write(f"**Source {i}:** {doc.metadata.get('source', 'Unknown Source')}") | |
| st.write(doc.page_content[:500]) # Display the first 500 characters of the source content | |
| # Display conversation history | |
| st.subheader("Conversation History") | |
| for idx, (q, a, s) in enumerate(st.session_state.conversation_history, 1): | |
| st.write(f"**Q{idx}:** {q}") | |
| st.write(f"**A{idx}:** {a}") | |
| st.write(f"**Sources for Q{idx}:**") | |
| for i, doc in enumerate(s, start=1): | |
| st.write(f"**Source {i}:** {doc.metadata.get('source', 'Unknown Source')}") | |
| st.write(doc.page_content[:300]) # Show a snippet for brevity | |