Spaces:

themanas021
/

Rag-test-truffles

Sleeping

App Files Files Community

Rag-test-truffles / app.py

themanas021

Update app.py

bcc2ddb verified over 1 year ago

raw

history blame contribute delete

3.23 kB

	import streamlit as st
	from langchain.embeddings import OpenAIEmbeddings
	from langchain.vectorstores import Chroma
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain.chat_models import ChatOpenAI
	from langchain.chains import RetrievalQA
	from langchain_community.document_loaders import TextLoader
	import chromadb

	chromadb.api.client.SharedSystemClient.clear_system_cache()

	import os
	os.environ["OPENAI_API_KEY"] = os.getenv('OPENAI_API_KEY')

	# Initialize the embeddings and model
	embd = OpenAIEmbeddings()
	llm = ChatOpenAI(model_name="gpt-4o", temperature=0)

	# Initialize conversation history
	if "conversation_history" not in st.session_state:
	st.session_state.conversation_history = []

	# Define the Streamlit app
	st.title("Text File Question-Answering with History")
	st.subheader("Upload a text file and ask questions. The app will maintain a conversation history.")

	# File upload section
	uploaded_file = st.file_uploader("Upload a text file", type=["txt"])

	from langchain.docstore.document import Document

	if uploaded_file:
	# Read and decode the content of the uploaded file
	file_content = uploaded_file.read().decode("utf-8")

	# Convert the content into a LangChain document
	document = [Document(page_content=file_content)]

	# Split the loaded document
	text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
	doc_splits = text_splitter.split_documents(document)

	# Create a vector store
	vectorstore = Chroma.from_documents(
	documents=doc_splits,
	collection_name="conversation_history",
	embedding=embd,
	persist_directory=None
	)
	retriever = vectorstore.as_retriever()

	# Initialize the QA chain
	qa_chain = RetrievalQA.from_chain_type(
	llm=llm,
	chain_type="stuff",
	retriever=retriever,
	return_source_documents=True,
	)

	# Question-answering section
	query = st.text_input("Ask a question:")

	if query:
	# Process the query
	result = qa_chain({"query": query})
	answer = result["result"]
	sources = result["source_documents"]

	# Append to conversation history
	st.session_state.conversation_history.append((query, answer, sources))

	# Display the current answer
	st.write("Answer:", answer)

	# Display the sources
	st.subheader("Source Documents")
	for i, doc in enumerate(sources, start=1):
	st.write(f"Source {i}: {doc.metadata.get('source', 'Unknown Source')}")
	st.write(doc.page_content[:500]) # Display the first 500 characters of the source content

	# Display conversation history
	st.subheader("Conversation History")
	for idx, (q, a, s) in enumerate(st.session_state.conversation_history, 1):
	st.write(f"Q{idx}: {q}")
	st.write(f"A{idx}: {a}")
	st.write(f"Sources for Q{idx}:")
	for i, doc in enumerate(s, start=1):
	st.write(f"Source {i}: {doc.metadata.get('source', 'Unknown Source')}")
	st.write(doc.page_content[:300]) # Show a snippet for brevity