Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -5,7 +5,7 @@ from langchain.llms import OpenAI
|
|
| 5 |
from langchain.vectorstores.cassandra import Cassandra
|
| 6 |
from langchain.indexes.vectorstore import VectorStoreIndexWrapper
|
| 7 |
from langchain.embeddings import OpenAIEmbeddings
|
| 8 |
-
|
| 9 |
from datasets import load_dataset
|
| 10 |
|
| 11 |
import cassio
|
|
@@ -16,44 +16,23 @@ from PyPDF2 import PdfReader
|
|
| 16 |
def main():
|
| 17 |
st.title("INTERACTION WITH PDF USING LLM")
|
| 18 |
|
| 19 |
-
# Upload PDF file
|
| 20 |
pdf_file = st.file_uploader("Upload PDF file", type=["pdf"])
|
| 21 |
|
| 22 |
if pdf_file is not None:
|
| 23 |
-
|
| 24 |
-
# pdf_content = read_pdf(pdf_file)
|
| 25 |
-
# st.markdown(pdf_content, unsafe_allow_html=True)
|
| 26 |
-
|
| 27 |
-
#def read_pdf(file):
|
| 28 |
-
# with pdfplumber.open(file) as pdf:
|
| 29 |
-
# text = ""
|
| 30 |
-
# for page in pdf.pages:
|
| 31 |
-
# text += page.extract_text()
|
| 32 |
-
|
| 33 |
-
# return text
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
ASTRA_DB_APPLICATION_TOKEN="AstraCS:KRrILGTZHQMczBfoJhucdxkN:a6aaf66c8f7e318f1048bb13ec9132510c3fefc85501a5268cd873edd418ad10"
|
| 43 |
ASTRA_DB_ID="800e9596-9d6a-487d-a87c-b95436d8026a"
|
| 44 |
OPENAI_API_KEY="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"
|
| 45 |
|
| 46 |
pdfreader=PdfReader(pdf_file)
|
| 47 |
-
|
| 48 |
-
|
| 49 |
raw_text=''
|
| 50 |
|
| 51 |
for i ,page in enumerate(pdfreader.pages):
|
| 52 |
content=page.extract_text()
|
| 53 |
if content:
|
| 54 |
raw_text += content
|
| 55 |
-
|
| 56 |
-
|
| 57 |
cassio.init(token=ASTRA_DB_APPLICATION_TOKEN,database_id=ASTRA_DB_ID)
|
| 58 |
|
| 59 |
llm=OpenAI(openai_api_key=OPENAI_API_KEY)
|
|
@@ -88,21 +67,11 @@ def main():
|
|
| 88 |
|
| 89 |
|
| 90 |
if submit:
|
| 91 |
-
st.write("\nQUESTION :\"%s\"" % query_text)
|
| 92 |
|
| 93 |
answer = astra_vector_index.query(query_text, llm=llm).strip()
|
| 94 |
|
| 95 |
st.write("\nANSWER :\"%s\"" % answer)
|
| 96 |
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
|
| 107 |
if __name__ == "__main__":
|
| 108 |
main()
|
|
|
|
| 5 |
from langchain.vectorstores.cassandra import Cassandra
|
| 6 |
from langchain.indexes.vectorstore import VectorStoreIndexWrapper
|
| 7 |
from langchain.embeddings import OpenAIEmbeddings
|
| 8 |
+
from typing_extensions import Concatenate
|
| 9 |
from datasets import load_dataset
|
| 10 |
|
| 11 |
import cassio
|
|
|
|
| 16 |
def main():
|
| 17 |
st.title("INTERACTION WITH PDF USING LLM")
|
| 18 |
|
|
|
|
| 19 |
pdf_file = st.file_uploader("Upload PDF file", type=["pdf"])
|
| 20 |
|
| 21 |
if pdf_file is not None:
|
| 22 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
ASTRA_DB_APPLICATION_TOKEN="AstraCS:KRrILGTZHQMczBfoJhucdxkN:a6aaf66c8f7e318f1048bb13ec9132510c3fefc85501a5268cd873edd418ad10"
|
| 24 |
ASTRA_DB_ID="800e9596-9d6a-487d-a87c-b95436d8026a"
|
| 25 |
OPENAI_API_KEY="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"
|
| 26 |
|
| 27 |
pdfreader=PdfReader(pdf_file)
|
| 28 |
+
|
|
|
|
| 29 |
raw_text=''
|
| 30 |
|
| 31 |
for i ,page in enumerate(pdfreader.pages):
|
| 32 |
content=page.extract_text()
|
| 33 |
if content:
|
| 34 |
raw_text += content
|
| 35 |
+
|
|
|
|
| 36 |
cassio.init(token=ASTRA_DB_APPLICATION_TOKEN,database_id=ASTRA_DB_ID)
|
| 37 |
|
| 38 |
llm=OpenAI(openai_api_key=OPENAI_API_KEY)
|
|
|
|
| 67 |
|
| 68 |
|
| 69 |
if submit:
|
|
|
|
| 70 |
|
| 71 |
answer = astra_vector_index.query(query_text, llm=llm).strip()
|
| 72 |
|
| 73 |
st.write("\nANSWER :\"%s\"" % answer)
|
| 74 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
|
| 76 |
if __name__ == "__main__":
|
| 77 |
main()
|