# app.py import os import gradio as gr from langchain.vectorstores import FAISS from langchain.embeddings import HuggingFaceEmbeddings from langchain.document_loaders import TextLoader from langchain_text_splitters import RecursiveCharacterTextSplitter from langchain.chains import RetrievalQA from langchain.llms import HuggingFacePipeline from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline # Optional: Set HF Token if needed # os.environ['HUGGINGFACEHUB_API_TOKEN'] = 'hf_XXXX' # Initialize embedding model embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2") # Load HF model (lightweight for CPU) model_name = "google/flan-t5-small" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForSeq2SeqLM.from_pretrained(model_name) # Wrap in pipeline pipe = pipeline("text2text-generation", model=model, tokenizer=tokenizer, max_length=512) llm = HuggingFacePipeline(pipeline=pipe) def process_file(file_path): # Load & split document loader = TextLoader(file_path) documents = loader.load() text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) docs = text_splitter.split_documents(documents) # Create vector DB vector_db = FAISS.from_documents(docs, embedding_model) retriever = vector_db.as_retriever() # Setup RetrievalQA chain qa_chain = RetrievalQA.from_chain_type( llm=llm, chain_type="stuff", retriever=retriever ) return qa_chain # Store the QA chain globally (across UI events) qa_chain = None def upload_and_prepare(file): global qa_chain # qa_chain = process_file(file) qa_chain = process_file(file.name) return "✅ Document processed. You can now ask questions!" def ask_question(query): if not qa_chain: return "❌ Please upload a document first." response = qa_chain.invoke({"query": query}) return response["result"] # Gradio UI with gr.Blocks() as demo: gr.Markdown("## 🧠 Ask Questions About Your Document (LangChain + Hugging Face)") with gr.Row(): file_input = gr.File(label="📄 Upload .txt File", type="filepath") upload_btn = gr.Button("🔄 Process Document") upload_output = gr.Textbox(label="📁 Status", interactive=False) with gr.Row(): query_input = gr.Textbox(label="❓ Your Question") query_btn = gr.Button("🧠 Get Answer") answer_output = gr.Textbox(label="✅ Answer", lines=4) upload_btn.click(upload_and_prepare, inputs=file_input, outputs=upload_output) query_btn.click(ask_question, inputs=query_input, outputs=answer_output) # For local dev use: demo.launch() # For HF Spaces if __name__ == "__main__": demo.launch()