ARBAJSSHAIKH commited on
Commit
77550f2
·
verified ·
1 Parent(s): 75e83db

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +96 -85
app.py CHANGED
@@ -1,31 +1,6 @@
1
  import streamlit as st
2
  import pdfplumber
3
  import base64
4
-
5
- def main():
6
- st.title("PDF Viewer App")
7
-
8
- # Upload PDF file
9
- pdf_file = st.file_uploader("Upload PDF file", type=["pdf"])
10
-
11
- if pdf_file is not None:
12
- # Display PDF content
13
- pdf_content = read_pdf(pdf_file)
14
- st.markdown(pdf_content, unsafe_allow_html=True)
15
-
16
- def read_pdf(file):
17
- with pdfplumber.open(file) as pdf:
18
- text = ""
19
- for page in pdf.pages:
20
- text += page.extract_text()
21
-
22
- return text
23
-
24
- if __name__ == "__main__":
25
- main()
26
-
27
-
28
-
29
  from langchain.llms import OpenAI
30
  from langchain.vectorstores.cassandra import Cassandra
31
  from langchain.indexes.vectorstore import VectorStoreIndexWrapper
@@ -38,65 +13,101 @@ import cassio
38
  from PyPDF2 import PdfReader
39
 
40
 
41
- ASTRA_DB_APPLICATION_TOKEN="AstraCS:KRrILGTZHQMczBfoJhucdxkN:a6aaf66c8f7e318f1048bb13ec9132510c3fefc85501a5268cd873edd418ad10"
42
- ASTRA_DB_ID="800e9596-9d6a-487d-a87c-b95436d8026a"
43
- OPENAI_API_KEY="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"
44
-
45
- pdfreader=PdfReader("budget_speech.pdf")
46
- from typing_extensions import Concatenate
47
-
48
- raw_text=''
49
-
50
- for i ,page in enumerate(pdfreader.pages):
51
- content=page.extract_text()
52
- if content:
53
- raw_text += content
54
-
55
-
56
- cassio.init(token=ASTRA_DB_APPLICATION_TOKEN,database_id=ASTRA_DB_ID)
57
-
58
- llm=OpenAI(openai_api_key=OPENAI_API_KEY)
59
- embedding=OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)
60
-
61
- astra_vector_store=Cassandra(embedding=embedding,
62
- table_name='qa_mini_demo',
63
- session=None,
64
- keyspace=None,
65
- )
66
-
67
-
68
- from langchain.text_splitter import CharacterTextSplitter
69
- text_splitter=CharacterTextSplitter(
70
- separator='\n',
71
- chunk_size=800,
72
- chunk_overlap=200,
73
- length_function=len
74
- )
75
-
76
- texts=text_splitter.split_text(raw_text)
77
-
78
-
79
- astra_vector_store.add_texts(texts)
80
-
81
- astra_vector_index=VectorStoreIndexWrapper(vectorstore=astra_vector_store)
82
-
83
-
84
- first_question=True
85
- while True:
86
- if first_question:
87
- query_text=input("\nEnter your Question or type quit to end:").strip()
88
-
89
- else:
90
- query_text=input("\nWhat is your next question:").strip()
91
-
92
- if query_text.lower()=='quit':
93
- break
94
-
95
- if query_text=='':
96
- continue
97
 
98
- first_question=False
 
99
 
100
- print("\nQUESTION :\"%s\"" % query_text)
101
- answer=astra_vector_index.query(query_text,llm=llm).strip()
102
- print("\nANSWER :\"%s\"" % answer)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
  import pdfplumber
3
  import base64
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  from langchain.llms import OpenAI
5
  from langchain.vectorstores.cassandra import Cassandra
6
  from langchain.indexes.vectorstore import VectorStoreIndexWrapper
 
13
  from PyPDF2 import PdfReader
14
 
15
 
16
+ def main():
17
+ st.title("INTERACTION WITH PDF USING LLM")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
+ # Upload PDF file
20
+ pdf_file = st.file_uploader("Upload PDF file", type=["pdf"])
21
 
22
+ if pdf_file is not None:
23
+ # Display PDF content
24
+ # pdf_content = read_pdf(pdf_file)
25
+ # st.markdown(pdf_content, unsafe_allow_html=True)
26
+
27
+ #def read_pdf(file):
28
+ # with pdfplumber.open(file) as pdf:
29
+ # text = ""
30
+ # for page in pdf.pages:
31
+ # text += page.extract_text()
32
+
33
+ # return text
34
+
35
+
36
+
37
+
38
+
39
+
40
+
41
+
42
+ ASTRA_DB_APPLICATION_TOKEN="AstraCS:KRrILGTZHQMczBfoJhucdxkN:a6aaf66c8f7e318f1048bb13ec9132510c3fefc85501a5268cd873edd418ad10"
43
+ ASTRA_DB_ID="800e9596-9d6a-487d-a87c-b95436d8026a"
44
+ OPENAI_API_KEY="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"
45
+
46
+ pdfreader=PdfReader(pdf_file)
47
+ from typing_extensions import Concatenate
48
+
49
+ raw_text=''
50
+
51
+ for i ,page in enumerate(pdfreader.pages):
52
+ content=page.extract_text()
53
+ if content:
54
+ raw_text += content
55
+
56
+
57
+ cassio.init(token=ASTRA_DB_APPLICATION_TOKEN,database_id=ASTRA_DB_ID)
58
+
59
+ llm=OpenAI(openai_api_key=OPENAI_API_KEY)
60
+ embedding=OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)
61
+
62
+ astra_vector_store=Cassandra(embedding=embedding,
63
+ table_name='qa_mini_demo',
64
+ session=None,
65
+ keyspace=None,
66
+ )
67
+
68
+
69
+ from langchain.text_splitter import CharacterTextSplitter
70
+ text_splitter=CharacterTextSplitter(
71
+ separator='\n',
72
+ chunk_size=800,
73
+ chunk_overlap=200,
74
+ length_function=len
75
+ )
76
+
77
+ texts=text_splitter.split_text(raw_text)
78
+
79
+
80
+ astra_vector_store.add_texts(texts)
81
+
82
+ astra_vector_index=VectorStoreIndexWrapper(vectorstore=astra_vector_store)
83
+
84
+
85
+
86
+
87
+ first_question = True
88
+
89
+ while True:
90
+ if first_question:
91
+ query_text = st.text_input("Enter your Question or type quit to end:").strip()
92
+ else:
93
+ query_text = st.text_input("What is your next question:").strip()
94
+
95
+ if query_text.lower() == 'quit':
96
+ break
97
+
98
+ if query_text == '':
99
+ continue
100
+
101
+ first_question = False
102
+
103
+ st.write("\nQUESTION :\"%s\"" % query_text)
104
+
105
+ # Assuming you have the functions and variables needed for querying
106
+ answer = astra_vector_index.query(query_text, llm=llm).strip()
107
+
108
+ st.write("\nANSWER :\"%s\"" % answer)
109
+
110
+
111
+
112
+ if __name__ == "__main__":
113
+ main()