rairo commited on
Commit
988c7cc
·
1 Parent(s): 0a85ee1

Create main.py

Browse files
Files changed (1) hide show
  1. main.py +96 -0
main.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import io
3
+ from flask import Flask, request
4
+ from twilio.twiml.messaging_response import MessagingResponse
5
+ from langchain.llms import GooglePalm
6
+ import pandas as pd
7
+ #from yolopandas import pd
8
+ import os
9
+ from langchain.embeddings import GooglePalmEmbeddings
10
+ # a class to create a question answering system based on information retrieval
11
+ from langchain.chains import RetrievalQA
12
+ # a class for splitting text into fixed-sized chunks with an optional overlay
13
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
14
+ # a class to create a vector index using FAISS, a library for approximate nearest neighbor search
15
+ from langchain.vectorstores import FAISS
16
+ # a class for loading PDF documents from a directory
17
+ from langchain.document_loaders import PyPDFDirectoryLoader
18
+ from langchain.chains.question_answering import load_qa_chain
19
+ from langchain.chains import ConversationalRetrievalChain
20
+ from langchain.schema.vectorstore import VectorStoreRetriever
21
+
22
+ from dotenv import load_dotenv
23
+
24
+ load_dotenv()
25
+
26
+
27
+
28
+
29
+ def get_pdf_text(pdf_docs):
30
+ text=""
31
+ for pdf in pdf_docs:
32
+ pdf_reader= PdfReader(pdf)
33
+ for page in pdf_reader.pages:
34
+ text+= page.extract_text()
35
+ return text
36
+
37
+ # load PDF files from a directory
38
+ loader = PyPDFDirectoryLoader("documents/")
39
+ data = loader.load()
40
+
41
+ # print the loaded data, which is a list of tuples (file name, text extracted from the PDF)
42
+ #print(data)
43
+
44
+ # split the extracted data into text chunks using the text_splitter, which splits the text based on the specified number of characters and overlap
45
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=20)
46
+
47
+ text_chunks = text_splitter.split_documents(data)
48
+
49
+ # print the number of chunks obtained
50
+ #print(len(text_chunks))
51
+
52
+ embeddings = GooglePalmEmbeddings(google_api_key=os.environ['PALM'])
53
+
54
+ # create embeddings for each text chunk using the FAISS class, which creates a vector index using FAISS and allows efficient searches between vectors
55
+ vector_store = FAISS.from_documents(text_chunks, embedding=embeddings)
56
+
57
+ #print(type(vector_store))
58
+
59
+
60
+
61
+ def ask_pdfs(user_question):
62
+ load_dotenv()
63
+
64
+
65
+
66
+ llm = GooglePalm(temperature=0, google_api_key=os.environ['PALM'])
67
+
68
+ # Create a question answering system based on information retrieval using the RetrievalQA class, which takes as input a neural language model, a chain type and a retriever (an object that allows you to retrieve the most relevant chunks of text for a query)
69
+ retriever = VectorStoreRetriever(vectorstore=vector_store)
70
+ qa = RetrievalQA.from_llm(llm=llm, retriever=retriever)
71
+ response =qa.run(user_question)
72
+ #print("Response:",response)
73
+
74
+ return response
75
+
76
+ app = Flask(__name__)
77
+
78
+ @app.route("/bot", methods=["POST"])
79
+ def whatsapp():
80
+
81
+ # user input
82
+ user_msg = request.values.get('Body', '').lower()
83
+
84
+ # creating object of MessagingResponse
85
+ response = MessagingResponse()
86
+
87
+ # User Query
88
+ q = user_msg
89
+
90
+ response = ask_pdfs(q)
91
+
92
+ return str(response)
93
+
94
+
95
+ if __name__ == "__main__":
96
+ app.run()