ARBAJSSHAIKH commited on
Commit
1e917f4
·
verified ·
1 Parent(s): d67d243

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +102 -0
app.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pdfplumber
3
+ import base64
4
+
5
+ def main():
6
+ st.title("PDF Viewer App")
7
+
8
+ # Upload PDF file
9
+ pdf_file = st.file_uploader("Upload PDF file", type=["pdf"])
10
+
11
+ if pdf_file is not None:
12
+ # Display PDF content
13
+ pdf_content = read_pdf(pdf_file)
14
+ st.markdown(pdf_content, unsafe_allow_html=True)
15
+
16
+ def read_pdf(file):
17
+ with pdfplumber.open(file) as pdf:
18
+ text = ""
19
+ for page in pdf.pages:
20
+ text += page.extract_text()
21
+
22
+ return text
23
+
24
+ if __name__ == "__main__":
25
+ main()
26
+
27
+
28
+
29
+ from langchain.llms import OpenAI
30
+ from langchain.vectorstores.cassandra import Cassandra
31
+ from langchain.indexes.vectorstore import VectorStoreIndexWrapper
32
+ from langchain.embeddings import OpenAIEmbeddings
33
+
34
+ from datasets import load_dataset
35
+
36
+ import cassio
37
+
38
+ from PyPDF2 import PdfReader
39
+
40
+
41
+ ASTRA_DB_APPLICATION_TOKEN="AstraCS:KRrILGTZHQMczBfoJhucdxkN:a6aaf66c8f7e318f1048bb13ec9132510c3fefc85501a5268cd873edd418ad10"
42
+ ASTRA_DB_ID="800e9596-9d6a-487d-a87c-b95436d8026a"
43
+ OPENAI_API_KEY="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"
44
+
45
+ pdfreader=PdfReader("budget_speech.pdf")
46
+ from typing_extensions import Concatenate
47
+
48
+ raw_text=''
49
+
50
+ for i ,page in enumerate(pdfreader.pages):
51
+ content=page.extract_text()
52
+ if content:
53
+ raw_text += content
54
+
55
+
56
+ cassio.init(token=ASTRA_DB_APPLICATION_TOKEN,database_id=ASTRA_DB_ID)
57
+
58
+ llm=OpenAI(openai_api_key=OPENAI_API_KEY)
59
+ embedding=OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)
60
+
61
+ astra_vector_store=Cassandra(embedding=embedding,
62
+ table_name='qa_mini_demo',
63
+ session=None,
64
+ keyspace=None,
65
+ )
66
+
67
+
68
+ from langchain.text_splitter import CharacterTextSplitter
69
+ text_splitter=CharacterTextSplitter(
70
+ separator='\n',
71
+ chunk_size=800,
72
+ chunk_overlap=200,
73
+ length_function=len
74
+ )
75
+
76
+ texts=text_splitter.split_text(raw_text)
77
+
78
+
79
+ astra_vector_store.add_texts(texts)
80
+
81
+ astra_vector_index=VectorStoreIndexWrapper(vectorstore=astra_vector_store)
82
+
83
+
84
+ first_question=True
85
+ while True:
86
+ if first_question:
87
+ query_text=input("\nEnter your Question or type quit to end:").strip()
88
+
89
+ else:
90
+ query_text=input("\nWhat is your next question:").strip()
91
+
92
+ if query_text.lower()=='quit':
93
+ break
94
+
95
+ if query_text=='':
96
+ continue
97
+
98
+ first_question=False
99
+
100
+ print("\nQUESTION :\"%s\"" % query_text)
101
+ answer=astra_vector_index.query(query_text,llm=llm).strip()
102
+ print("\nANSWER :\"%s\"" % answer)