Gary commited on
Commit
4f25114
·
1 Parent(s): 75b27ab

Initial commit

Browse files
Files changed (3) hide show
  1. app.py +24 -0
  2. indexer.py +32 -0
  3. requirements.txt +9 -0
app.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from indexer import index_text, answer_query
3
+
4
+ # Gradio interface function to handle the RAG system
5
+ def rag_system(input_text, query):
6
+ # Index the input text
7
+ vectorstore = index_text(input_text)
8
+
9
+ # Answer the query based on the indexed text
10
+ answer = answer_query(query, vectorstore)
11
+
12
+ return answer
13
+
14
+ # Build the Gradio interface
15
+ iface = gr.Interface(
16
+ fn=rag_system,
17
+ inputs=["text", "text"],
18
+ outputs="text",
19
+ title="RAG QA System",
20
+ description="Enter a text and ask questions based on the input text."
21
+ )
22
+
23
+ # Launch the app
24
+ iface.launch()
indexer.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import pipeline
2
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
3
+ from langchain_core.documents import Document
4
+ from langchain.embeddings import HuggingFaceEmbeddings
5
+ from langchain.vectorstores import FAISS
6
+
7
+ # Function to index the input text
8
+ def index_text(input_text):
9
+ # Split the input text into chunks
10
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
11
+ splits = text_splitter.split_text(input_text)
12
+ documents = [Document(page_content=chunk) for chunk in splits]
13
+
14
+ # Embed the input text chunks
15
+ embeddings = HuggingFaceEmbeddings()
16
+
17
+ vectorstore = FAISS.from_documents(documents=documents, embedding=embeddings)
18
+
19
+ return vectorstore
20
+
21
+ def answer_query(query, vectorstore):
22
+ retriever = vectorstore.as_retriever()
23
+ search_results = retriever.get_relevant_documents(query)
24
+
25
+ context = " ".join([doc.page_content for doc in search_results])
26
+
27
+ generator = pipeline("text2text-generation", model="google/flan-t5-base")
28
+
29
+ prompt = f"Based on this text: {context}\nAnswer this question: {query}"
30
+ generated_text = generator(prompt, max_length=100)
31
+
32
+ return generated_text[0]['generated_text']
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ gradio==3.50.2
2
+ langchain==0.1.14
3
+ langchain_community==0.0.30
4
+ langchain-core==0.1.37
5
+ transformers==4.36.2
6
+ faiss-cpu==1.7.4
7
+ sentence-transformers==2.5.1
8
+ huggingface_hub>=0.20.0
9
+ pydantic<2.0