|
|
|
|
|
|
|
|
import os |
|
|
import gradio as gr |
|
|
|
|
|
from langchain.vectorstores import FAISS |
|
|
from langchain.embeddings import HuggingFaceEmbeddings |
|
|
from langchain.document_loaders import TextLoader |
|
|
from langchain_text_splitters import RecursiveCharacterTextSplitter |
|
|
from langchain.chains import RetrievalQA |
|
|
from langchain.llms import HuggingFacePipeline |
|
|
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2") |
|
|
|
|
|
|
|
|
model_name = "google/flan-t5-small" |
|
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
|
model = AutoModelForSeq2SeqLM.from_pretrained(model_name) |
|
|
|
|
|
|
|
|
pipe = pipeline("text2text-generation", model=model, tokenizer=tokenizer, max_length=512) |
|
|
llm = HuggingFacePipeline(pipeline=pipe) |
|
|
|
|
|
def process_file(file_path): |
|
|
|
|
|
loader = TextLoader(file_path) |
|
|
documents = loader.load() |
|
|
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) |
|
|
docs = text_splitter.split_documents(documents) |
|
|
|
|
|
|
|
|
vector_db = FAISS.from_documents(docs, embedding_model) |
|
|
retriever = vector_db.as_retriever() |
|
|
|
|
|
|
|
|
qa_chain = RetrievalQA.from_chain_type( |
|
|
llm=llm, |
|
|
chain_type="stuff", |
|
|
retriever=retriever |
|
|
) |
|
|
|
|
|
return qa_chain |
|
|
|
|
|
|
|
|
qa_chain = None |
|
|
|
|
|
def upload_and_prepare(file): |
|
|
global qa_chain |
|
|
|
|
|
qa_chain = process_file(file.name) |
|
|
return "β
Document processed. You can now ask questions!" |
|
|
|
|
|
def ask_question(query): |
|
|
if not qa_chain: |
|
|
return "β Please upload a document first." |
|
|
response = qa_chain.invoke({"query": query}) |
|
|
return response["result"] |
|
|
|
|
|
|
|
|
with gr.Blocks() as demo: |
|
|
gr.Markdown("## π§ Ask Questions About Your Document (LangChain + Hugging Face)") |
|
|
|
|
|
with gr.Row(): |
|
|
file_input = gr.File(label="π Upload .txt File", type="filepath") |
|
|
upload_btn = gr.Button("π Process Document") |
|
|
|
|
|
upload_output = gr.Textbox(label="π Status", interactive=False) |
|
|
|
|
|
with gr.Row(): |
|
|
query_input = gr.Textbox(label="β Your Question") |
|
|
query_btn = gr.Button("π§ Get Answer") |
|
|
|
|
|
answer_output = gr.Textbox(label="β
Answer", lines=4) |
|
|
|
|
|
upload_btn.click(upload_and_prepare, inputs=file_input, outputs=upload_output) |
|
|
query_btn.click(ask_question, inputs=query_input, outputs=answer_output) |
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch() |
|
|
|
|
|
|
|
|
|