File size: 2,452 Bytes
1e917f4
 
 
 
 
 
 
0772e0c
1e917f4
f12caf6
1e917f4
 
 
 
 
77550f2
 
1e917f4
77550f2
1e917f4
77550f2
0772e0c
77550f2
 
ef419e8
77550f2
 
0772e0c
77550f2
 
 
 
 
 
0772e0c
77550f2
f12caf6
 
ab74dd0
3a520c6
77550f2
 
 
 
 
 
 
 
 
e0c1f02
77550f2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0cd02ab
1b61cce
0cd02ab
3d70fc1
0cd02ab
 
3d70fc1
0cd02ab
 
 
77550f2
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import streamlit as st
import pdfplumber
import base64
from langchain.llms import OpenAI
from langchain.vectorstores.cassandra import Cassandra
from langchain.indexes.vectorstore import VectorStoreIndexWrapper
from langchain.embeddings import OpenAIEmbeddings
from typing_extensions import Concatenate
from datasets import load_dataset
from langchain.memory import ConversationBufferWindowMemory
import cassio

from PyPDF2 import PdfReader


def main():
    st.title("INTERACTION WITH PDF USING LLM")

    pdf_file = st.file_uploader("Upload PDF file", type=["pdf"])

    if pdf_file is not None:
 
        ASTRA_DB_APPLICATION_TOKEN="AstraCS:KRrILGTZHQMczBfoJhucdxkN:a6aaf66c8f7e318f1048bb13ec9132510c3fefc85501a5268cd873edd418ad10"
        ASTRA_DB_ID="800e9596-9d6a-487d-a87c-b95436d8026a"
        OPENAI_API_KEY="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"
        
        pdfreader=PdfReader(pdf_file)
  
        raw_text=''
        
        for i ,page in enumerate(pdfreader.pages):
          content=page.extract_text()
          if content:
            raw_text += content
 
        cassio.init(token=ASTRA_DB_APPLICATION_TOKEN,database_id=ASTRA_DB_ID)

        
        
        llm=OpenAI(openai_api_key=OPENAI_API_KEY)
        
        embedding=OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)
        
        astra_vector_store=Cassandra(embedding=embedding,
                                     table_name='qa_mini_demo',
                                     session=None,
                                     keyspace=None,
                                     )
        
        astra_vector_store.delete_collection()
        
        from langchain.text_splitter import CharacterTextSplitter
        text_splitter=CharacterTextSplitter(
            separator='\n',
            chunk_size=800,
            chunk_overlap=200,
            length_function=len
        )
        
        texts=text_splitter.split_text(raw_text)
        
        
        astra_vector_store.add_texts(texts)
        
        astra_vector_index=VectorStoreIndexWrapper(vectorstore=astra_vector_store)
        
        
        query_text = st.text_input("Enter your Question:").strip()
        
        submit=st.button('Generate')

        
        if submit:

            answer = astra_vector_index.query(query_text, llm=llm).strip()
        
            st.write("\nANSWER :\"%s\"" % answer)
        
    
if __name__ == "__main__":
    main()