ChatWithPDF / src /streamlit_app.py
zypchn's picture
Update src/streamlit_app.py
315125b verified
### Chat With PDF ###
import os
from dotenv import load_dotenv
import streamlit as st
import cassio
from langchain_community.vectorstores import Cassandra
from langchain.indexes.vectorstore import VectorStoreIndexWrapper
from langchain_community.llms import OpenAI
from langchain_openai import ChatOpenAI
from langchain.prompts.chat import ChatPromptTemplate
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import CharacterTextSplitter
from PyPDF2 import PdfReader
load_dotenv()
ASTRADB_APP_TOKEN = os.getenv("ASTRA_DB_TOKEN")
ASTRADB_ID = os.getenv("ASTRA_DB_ID")
def read_file_and_chunk(pdf):
reader = PdfReader(pdf)
raw_text = ""
for _, page in enumerate(reader.pages):
content = page.extract_text()
if content:
raw_text += content
text_splitter = CharacterTextSplitter(
separator="\n",
chunk_size=400,
chunk_overlap=100,
length_function=len
)
text_chunks = text_splitter.split_text(raw_text)
return text_chunks
def initialize_database():
cassio.init(
token=ASTRADB_APP_TOKEN,
database_id=ASTRADB_ID
)
astra_vector_store = Cassandra(
embedding=embed,
table_name="pdf_chat",
session=None,
keyspace=None
)
return astra_vector_store
def load_to_db(texts, vector_store):
vector_store.add_texts(texts)
vector_index = VectorStoreIndexWrapper(vectorstore=vector_store)
return vector_index
# Initialize Streamlit app
st.set_page_config(page_title="Chat With PDF")
st.header("Ask Questions About Your Documents")
OPENAI_API_KEY = st.text_input("OpenAI API Key: ", type="password")
llm = OpenAI(openai_api_key=OPENAI_API_KEY)
embed = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)
uploaded_file = st.file_uploader("Upload your PDF file")
if uploaded_file is not None:
st.write("Reading and indexing your PDF, this may take a moment...")
try:
chunks = read_file_and_chunk(uploaded_file)
astra_vector_store = initialize_database()
astra_vector_index = load_to_db(chunks, astra_vector_store)
except Exception as e:
st.subheader(e)
user_query = st.text_input("Query: ", key=input)
submit = st.button("Ask")
if submit:
answer = astra_vector_index.query(user_query, llm=llm).strip()
st.subheader("Answer:")
st.write(answer)