Spaces:
Sleeping
Sleeping
File size: 2,452 Bytes
1e917f4 0772e0c 1e917f4 f12caf6 1e917f4 77550f2 1e917f4 77550f2 1e917f4 77550f2 0772e0c 77550f2 ef419e8 77550f2 0772e0c 77550f2 0772e0c 77550f2 f12caf6 ab74dd0 3a520c6 77550f2 e0c1f02 77550f2 0cd02ab 1b61cce 0cd02ab 3d70fc1 0cd02ab 3d70fc1 0cd02ab 77550f2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 |
import streamlit as st
import pdfplumber
import base64
from langchain.llms import OpenAI
from langchain.vectorstores.cassandra import Cassandra
from langchain.indexes.vectorstore import VectorStoreIndexWrapper
from langchain.embeddings import OpenAIEmbeddings
from typing_extensions import Concatenate
from datasets import load_dataset
from langchain.memory import ConversationBufferWindowMemory
import cassio
from PyPDF2 import PdfReader
def main():
st.title("INTERACTION WITH PDF USING LLM")
pdf_file = st.file_uploader("Upload PDF file", type=["pdf"])
if pdf_file is not None:
ASTRA_DB_APPLICATION_TOKEN="AstraCS:KRrILGTZHQMczBfoJhucdxkN:a6aaf66c8f7e318f1048bb13ec9132510c3fefc85501a5268cd873edd418ad10"
ASTRA_DB_ID="800e9596-9d6a-487d-a87c-b95436d8026a"
OPENAI_API_KEY="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"
pdfreader=PdfReader(pdf_file)
raw_text=''
for i ,page in enumerate(pdfreader.pages):
content=page.extract_text()
if content:
raw_text += content
cassio.init(token=ASTRA_DB_APPLICATION_TOKEN,database_id=ASTRA_DB_ID)
llm=OpenAI(openai_api_key=OPENAI_API_KEY)
embedding=OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)
astra_vector_store=Cassandra(embedding=embedding,
table_name='qa_mini_demo',
session=None,
keyspace=None,
)
astra_vector_store.delete_collection()
from langchain.text_splitter import CharacterTextSplitter
text_splitter=CharacterTextSplitter(
separator='\n',
chunk_size=800,
chunk_overlap=200,
length_function=len
)
texts=text_splitter.split_text(raw_text)
astra_vector_store.add_texts(texts)
astra_vector_index=VectorStoreIndexWrapper(vectorstore=astra_vector_store)
query_text = st.text_input("Enter your Question:").strip()
submit=st.button('Generate')
if submit:
answer = astra_vector_index.query(query_text, llm=llm).strip()
st.write("\nANSWER :\"%s\"" % answer)
if __name__ == "__main__":
main() |