Spaces:
Sleeping
Sleeping
File size: 2,426 Bytes
315125b f96ce6a 315125b f96ce6a 315125b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 |
### Chat With PDF ###
import os
from dotenv import load_dotenv
import streamlit as st
import cassio
from langchain_community.vectorstores import Cassandra
from langchain.indexes.vectorstore import VectorStoreIndexWrapper
from langchain_community.llms import OpenAI
from langchain_openai import ChatOpenAI
from langchain.prompts.chat import ChatPromptTemplate
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import CharacterTextSplitter
from PyPDF2 import PdfReader
load_dotenv()
ASTRADB_APP_TOKEN = os.getenv("ASTRA_DB_TOKEN")
ASTRADB_ID = os.getenv("ASTRA_DB_ID")
def read_file_and_chunk(pdf):
reader = PdfReader(pdf)
raw_text = ""
for _, page in enumerate(reader.pages):
content = page.extract_text()
if content:
raw_text += content
text_splitter = CharacterTextSplitter(
separator="\n",
chunk_size=400,
chunk_overlap=100,
length_function=len
)
text_chunks = text_splitter.split_text(raw_text)
return text_chunks
def initialize_database():
cassio.init(
token=ASTRADB_APP_TOKEN,
database_id=ASTRADB_ID
)
astra_vector_store = Cassandra(
embedding=embed,
table_name="pdf_chat",
session=None,
keyspace=None
)
return astra_vector_store
def load_to_db(texts, vector_store):
vector_store.add_texts(texts)
vector_index = VectorStoreIndexWrapper(vectorstore=vector_store)
return vector_index
# Initialize Streamlit app
st.set_page_config(page_title="Chat With PDF")
st.header("Ask Questions About Your Documents")
OPENAI_API_KEY = st.text_input("OpenAI API Key: ", type="password")
llm = OpenAI(openai_api_key=OPENAI_API_KEY)
embed = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)
uploaded_file = st.file_uploader("Upload your PDF file")
if uploaded_file is not None:
st.write("Reading and indexing your PDF, this may take a moment...")
try:
chunks = read_file_and_chunk(uploaded_file)
astra_vector_store = initialize_database()
astra_vector_index = load_to_db(chunks, astra_vector_store)
except Exception as e:
st.subheader(e)
user_query = st.text_input("Query: ", key=input)
submit = st.button("Ask")
if submit:
answer = astra_vector_index.query(user_query, llm=llm).strip()
st.subheader("Answer:")
st.write(answer)
|