Saran03 commited on
Commit
b27cc0d
·
verified ·
1 Parent(s): 3f95580

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +142 -0
app.py ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ from langchain_community.vectorstores import Chroma
4
+ from langchain_community.chat_models import ChatOllama
5
+ from langchain_community.embeddings import FastEmbedEmbeddings
6
+ from langchain_community.document_loaders import PyPDFLoader
7
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
8
+ from langchain.schema.output_parser import StrOutputParser
9
+ from langchain.prompts import PromptTemplate
10
+ from langchain.schema.runnable import RunnablePassthrough
11
+ from sklearn.metrics.pairwise import cosine_similarity
12
+ import numpy as np
13
+
14
+ # Initialize embeddings model and vector store
15
+ embeddings_model = FastEmbedEmbeddings(model_name="BAAI/bge-small-en-v1.5")
16
+ vector_store = None
17
+
18
+ # Chat history (initialize with an empty list)
19
+ chat_history = []
20
+
21
+ # Store previous questions and their embeddings
22
+ question_embeddings = []
23
+
24
+ # Prompt templates for LLM
25
+ prompt_with_context_template = """Analyze the following context and answer the question based only on the following context:
26
+ {context}
27
+
28
+ Question: {question}
29
+ """
30
+ prompt_without_context_template = """Provide an answer to the question based on general knowledge.
31
+ Question: {question}
32
+ """
33
+ prompt_with_context = PromptTemplate.from_template(prompt_with_context_template)
34
+ prompt_without_context = PromptTemplate.from_template(prompt_without_context_template)
35
+
36
+ # Function to load, split PDFs, and store in vector store
37
+ def process_documents(uploaded_files):
38
+ global vector_store
39
+ all_docs = []
40
+ for uploaded_file in uploaded_files:
41
+ # Load each PDF using PyPDFLoader
42
+ loader = PyPDFLoader(uploaded_file)
43
+ pages = loader.load_and_split()
44
+
45
+ # Split documents into chunks
46
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
47
+ docs = text_splitter.split_documents(pages)
48
+ all_docs.extend(docs)
49
+
50
+ # Create or update the vector store
51
+ if vector_store is None:
52
+ vector_store = Chroma.from_documents(all_docs, embeddings_model)
53
+ else:
54
+ vector_store.add_documents(all_docs)
55
+
56
+ return f"Uploaded {len(uploaded_files)} files and indexed {len(all_docs)} chunks."
57
+
58
+ # Function to handle question answering with RAG and maintain chat history
59
+ def answer_question(question):
60
+ global vector_store, chat_history, question_embeddings
61
+
62
+ # Set up retriever and LLM
63
+ retriever = vector_store.as_retriever() if vector_store else None
64
+ llm = ChatOllama(model="llama3:latest", verbose=True)
65
+
66
+ if retriever:
67
+ # Define the RAG chain with document context
68
+ chain = (
69
+ {"context": retriever, "question": RunnablePassthrough()}
70
+ | prompt_with_context
71
+ | llm
72
+ | StrOutputParser()
73
+ )
74
+ # Process user question through RAG chain with context
75
+ answer = chain.invoke(question).capitalize()
76
+ else:
77
+ # Define the RAG chain without document context
78
+ chain = (
79
+ {"question": RunnablePassthrough()}
80
+ | prompt_without_context
81
+ | llm
82
+ | StrOutputParser()
83
+ )
84
+ # Process user question through RAG chain without context
85
+ answer = chain.invoke(question).capitalize()
86
+
87
+ # Append the question and answer to the chat history
88
+ chat_history.append((f"Q: {question}", f"A: {answer}"))
89
+
90
+ # Encode the current question and store its embedding
91
+ current_question_embedding = embeddings_model.embed_query(question)
92
+ question_embeddings.append(current_question_embedding)
93
+
94
+ # Find related questions
95
+ related_question = "No related questions found."
96
+ if question_embeddings:
97
+ # Compute similarity between current question and previous questions
98
+ similarities = cosine_similarity([current_question_embedding], question_embeddings)
99
+ related_idx = np.argmax(similarities)
100
+ if similarities[0][related_idx] > 0.5:
101
+ related_question = chat_history[related_idx][0]
102
+
103
+ # Format the chat history for display
104
+ chat_display = "\n\n".join([f"{q}\n{a}" for q, a in chat_history])
105
+
106
+ return answer, chat_display, related_question
107
+
108
+ # Function to clear the vector store
109
+ def clear_documents():
110
+ global vector_store
111
+ if vector_store is not None:
112
+ vector_store.delete_collection()
113
+ vector_store = None
114
+ return "Document collection cleared.", chat_history, ""
115
+
116
+ # Gradio interface
117
+ with gr.Blocks() as demo:
118
+ # Main layout with two columns
119
+ with gr.Row():
120
+ # Left column for file upload and question input
121
+ with gr.Column(scale=1):
122
+ file_uploader = gr.File(label="Upload PDFs", file_types=[".pdf"], file_count="multiple", type="filepath")
123
+ upload_button = gr.Button("Upload and Process")
124
+ clear_button = gr.Button("Clear Document Collection")
125
+ status_display = gr.Textbox(label="Status", lines=2)
126
+
127
+ question_input = gr.Textbox(label="Ask a question about the documents")
128
+ ask_button = gr.Button("Ask")
129
+
130
+ # Center column for answer and chat history
131
+ with gr.Column(scale=2):
132
+ answer_display = gr.Textbox(label="Answer", lines=4)
133
+ chat_history_display = gr.Textbox(label="Chat History", lines=10, interactive=False)
134
+ related_question_display = gr.Textbox(label="Related Question", lines=4, interactive=False)
135
+
136
+ # Link buttons to functions
137
+ upload_button.click(process_documents, inputs=[file_uploader], outputs=[status_display])
138
+ ask_button.click(answer_question, inputs=[question_input], outputs=[answer_display, chat_history_display, related_question_display])
139
+ clear_button.click(clear_documents, outputs=[status_display, chat_history_display, related_question_display])
140
+
141
+ # Launch the app
142
+ demo.launch(inline=False)