menikev commited on
Commit
2fed7ea
·
verified ·
1 Parent(s): e5a3c91

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +130 -0
app.py ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from langchain_community.document_loaders import PyPDFLoader
3
+ from langchain_community.vectorstores import FAISS
4
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
5
+ from langchain.chains import RetrievalQA
6
+ from langchain_community.llms import HuggingFaceHub
7
+ from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings
8
+
9
+ # You can use this section to suppress warnings generated by your code:
10
+ def warn(*args, **kwargs):
11
+ pass
12
+ import warnings
13
+ warnings.warn = warn
14
+ warnings.filterwarnings('ignore')
15
+
16
+ # Set your Hugging Face API token here.
17
+ # For deployment on Hugging Face, you can set this as an environment variable.
18
+ import os
19
+ os.environ["HUGGINGFACEHUB_API_TOKEN"] = "hf_YOUR_HUGGINGFACE_TOKEN"
20
+
21
+ ## LLM - Using an open-source model from Hugging Face
22
+ def get_llm():
23
+ """
24
+ Initializes and returns a Hugging Face Hub LLM model.
25
+ Using a conversational model suitable for legal advice.
26
+ """
27
+ repo_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"
28
+ llm = HuggingFaceHub(
29
+ repo_id=repo_id,
30
+ model_kwargs={"temperature": 0.1, "max_length": 512}
31
+ )
32
+ return llm
33
+
34
+ ## Document loader
35
+ def document_loader(file_path):
36
+ """
37
+ Loads a PDF document from the given file path.
38
+ """
39
+ loader = PyPDFLoader(file_path)
40
+ loaded_document = loader.load()
41
+ return loaded_document
42
+
43
+ ## Text splitter
44
+ def text_splitter(data):
45
+ """
46
+ Splits the loaded document into smaller chunks for processing.
47
+ """
48
+ text_splitter = RecursiveCharacterTextSplitter(
49
+ chunk_size=1000,
50
+ chunk_overlap=200,
51
+ length_function=len,
52
+ )
53
+ chunks = text_splitter.split_documents(data)
54
+ return chunks
55
+
56
+ ## Vector db and Embedding model
57
+ def vector_database(chunks):
58
+ """
59
+ Creates a FAISS vector database from the document chunks using a
60
+ Hugging Face embeddings model.
61
+ """
62
+ # Using a sentence-transformer model from Hugging Face for embeddings
63
+ embedding_model = HuggingFaceInferenceAPIEmbeddings(
64
+ api_key=os.environ["HUGGINGFACEHUB_API_TOKEN"],
65
+ model_name="sentence-transformers/all-MiniLM-L6-v2"
66
+ )
67
+ vectordb = FAISS.from_documents(chunks, embedding_model)
68
+ return vectordb
69
+
70
+ ## Retriever
71
+ def retriever(file_path):
72
+ """
73
+ Loads, splits, and creates a retriever for the document.
74
+ """
75
+ splits = document_loader(file_path)
76
+ chunks = text_splitter(splits)
77
+ vectordb = vector_database(chunks)
78
+ retriever = vectordb.as_retriever()
79
+ return retriever
80
+
81
+ ## QA Chain
82
+ def retriever_qa(file, query):
83
+ """
84
+ Sets up a RetrievalQA chain to answer questions based on the document.
85
+ """
86
+ llm = get_llm()
87
+ retriever_obj = retriever(file)
88
+
89
+ # Custom prompt to act as a conversational legal advisor
90
+ prompt_template = f"""
91
+ You are a friendly and professional legal advisor. Your goal is to provide concise and contextual legal advice based on the provided document.
92
+ Do not give verbatim answers. Instead, analyze the relevant text and respond in a conversational manner.
93
+
94
+ Context:
95
+ {file}
96
+
97
+ Question: {query}
98
+
99
+ Legal Advisor's Answer:
100
+ """
101
+
102
+ qa = RetrievalQA.from_chain_type(
103
+ llm=llm,
104
+ chain_type="stuff",
105
+ retriever=retriever_obj,
106
+ return_source_documents=True,
107
+ )
108
+
109
+ # Using a custom prompt template for the LLM
110
+ response = qa.invoke({"query": prompt_template})
111
+
112
+ # Extract the contextual response from the full LLM output
113
+ result_text = response['result']
114
+ return result_text
115
+
116
+ # Create Gradio interface
117
+ rag_application = gr.Interface(
118
+ fn=retriever_qa,
119
+ allow_flagging="never",
120
+ inputs=[
121
+ gr.File(label="Upload PDF File", file_count="single", file_types=['.pdf'], type="filepath"),
122
+ gr.Textbox(label="Input Query", lines=2, placeholder="Type your question here...")
123
+ ],
124
+ outputs=gr.Textbox(label="Legal Advisor's Response"),
125
+ title="Nigerian Constitution Legal Advisor Chatbot",
126
+ description="Upload the Nigerian Constitution and ask me questions about it. I will provide a conversational and contextual response."
127
+ )
128
+
129
+ # Launch the app
130
+ rag_application.launch(share=True)