Solab commited on
Commit
ec330f9
·
1 Parent(s): 4fbfc9f

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +105 -0
app.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Import standard library modules
2
+ import os
3
+
4
+ # Import third-party modules
5
+ from dotenv import load_dotenv, find_dotenv
6
+ import google.generativeai as palm
7
+ import PyPDF2 # Import PyPDF2 library for reading PDF files
8
+
9
+ # Import local modules
10
+ from langchain import PromptTemplate, LLMChain
11
+ from langchain.chains import RetrievalQA
12
+ from langchain.chains.question_answering import load_qa_chain
13
+ from langchain.document_loaders import UnstructuredPDFLoader, UnstructuredURLLoader
14
+ from langchain.embeddings import GooglePalmEmbeddings
15
+ from langchain.indexes import VectorstoreIndexCreator
16
+ from langchain.llms import GooglePalm
17
+ from langchain.text_splitter import CharacterTextSplitter
18
+
19
+ # Import gradio module
20
+ import gradio as gr
21
+
22
+ # Load environment variables from .env file
23
+ load_dotenv(find_dotenv())
24
+
25
+ # Configure Google Palm API with API key
26
+ api_key = os.environ["GOOGLE_API_KEY"]
27
+ palm.configure(api_key=api_key)
28
+
29
+ # Create an instance of Google Palm language model
30
+ llm = GooglePalm()
31
+ llm.temperature = 0.1
32
+
33
+ # List available models that support generateText method
34
+ models = [
35
+ m for m in palm.list_models() if "generateText" in m.supported_generation_methods
36
+ ]
37
+ print(f"There are {len(models)} model(s) available.")
38
+
39
+ # Create an instance of VectorstoreIndexCreator with Google Palm embeddings and character text splitter
40
+ index_creator = VectorstoreIndexCreator(
41
+ embedding=GooglePalmEmbeddings(),
42
+ text_splitter=CharacterTextSplitter(chunk_size=1000, chunk_overlap=0),
43
+ )
44
+
45
+ # Define a function that takes a PDF file and returns its text content
46
+ def pdf_to_text(file_obj):
47
+ # Open the PDF file using PyPDF2 library
48
+ pdf_file = open(file_obj.name, 'rb')
49
+ pdf_reader = PyPDF2.PdfFileReader(pdf_file)
50
+ # Get the number of pages in the PDF file
51
+ num_pages = pdf_reader.numPages
52
+ # Initialize an empty string to store the text content
53
+ text_content = ""
54
+ # Loop through each page and extract its text
55
+ for i in range(num_pages):
56
+ page = pdf_reader.getPage(i)
57
+ text_content += page.extractText()
58
+ # Close the PDF file
59
+ pdf_file.close()
60
+ # Return the text content
61
+ return text_content
62
+
63
+ # Define a function that takes a question and a PDF file and returns the answer from the PDF chain
64
+ def answer_question(question, pdf_file):
65
+ # Use the pdf_to_text function to get the text content of the PDF file
66
+ pdf_text = pdf_to_text(pdf_file)
67
+ # Create an UnstructuredPDFLoader object from the pdf_text
68
+ pdf_loader = UnstructuredPDFLoader(pdf_text)
69
+ # Create an index from the PDF loader using the index creator
70
+ pdf_index = index_creator.from_loaders([pdf_loader])
71
+ # Create a RetrievalQA chain from the PDF index using the llm and a custom chain type
72
+ pdf_chain = RetrievalQA.from_chain_type(
73
+ llm=llm,
74
+ chain_type="stuff",
75
+ retriever=pdf_index.vectorstore.as_retriever(),
76
+ input_key="question",
77
+ )
78
+ # Use the PDF chain to answer questions about the document
79
+ return pdf_chain.run(question)
80
+
81
+ # Define a template for answering questions as an AI assistant working for Raising The Village
82
+ template = """
83
+ You are an artificial intelligence assistant working for Raising The village. You are asked to answer questions. The assistant gives helpful, detailed, and polite answers to the user's questions.
84
+
85
+ {question}
86
+
87
+ """
88
+
89
+ # Create a PromptTemplate object from the template with question as an input variable
90
+ prompt = PromptTemplate(template=template, input_variables=["question"])
91
+
92
+ # Create a LLMChain object from the prompt and the llm with verbose mode on
93
+ llm_chain = LLMChain(prompt=prompt, llm=llm, verbose=True)
94
+
95
+ # Create a gradio interface with a text input, a file input and a text output for the answer function
96
+ interface = gr.Interface(
97
+ fn=answer_question,
98
+ inputs=["text", gr.inputs.File(file_types=['.pdf'])], # Add a file input for PDF files
99
+ outputs="text",
100
+ title="AI Assistant",
101
+ description="Ask me anything about Raising The Village"
102
+ )
103
+
104
+ # Launch the interface in your browser or notebook
105
+ interface.launch(share=True)