manabb commited on
Commit
3ef7736
Β·
verified Β·
1 Parent(s): 11953d2

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +88 -0
  2. requirements.txt +14 -0
app.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # app.py
3
+ import os
4
+ import gradio as gr
5
+
6
+ from langchain.vectorstores import FAISS
7
+ from langchain.embeddings import HuggingFaceEmbeddings
8
+ from langchain.document_loaders import TextLoader
9
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
10
+ from langchain.chains import RetrievalQA
11
+ from langchain.llms import HuggingFacePipeline
12
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
13
+
14
+ # Optional: Set HF Token if needed
15
+ # os.environ['HUGGINGFACEHUB_API_TOKEN'] = 'hf_XXXX'
16
+
17
+ # Initialize embedding model
18
+ embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
19
+
20
+ # Load HF model (lightweight for CPU)
21
+ model_name = "google/flan-t5-small"
22
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
23
+ model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
24
+
25
+ # Wrap in pipeline
26
+ pipe = pipeline("text2text-generation", model=model, tokenizer=tokenizer, max_length=512)
27
+ llm = HuggingFacePipeline(pipeline=pipe)
28
+
29
+ def process_file(file_path):
30
+ # Load & split document
31
+ loader = TextLoader(file_path)
32
+ documents = loader.load()
33
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
34
+ docs = text_splitter.split_documents(documents)
35
+
36
+ # Create vector DB
37
+ vector_db = FAISS.from_documents(docs, embedding_model)
38
+ retriever = vector_db.as_retriever()
39
+
40
+ # Setup RetrievalQA chain
41
+ qa_chain = RetrievalQA.from_chain_type(
42
+ llm=llm,
43
+ chain_type="stuff",
44
+ retriever=retriever
45
+ )
46
+
47
+ return qa_chain
48
+
49
+ # Store the QA chain globally (across UI events)
50
+ qa_chain = None
51
+
52
+ def upload_and_prepare(file):
53
+ global qa_chain
54
+ # qa_chain = process_file(file)
55
+ qa_chain = process_file(file.name)
56
+ return "βœ… Document processed. You can now ask questions!"
57
+
58
+ def ask_question(query):
59
+ if not qa_chain:
60
+ return "❌ Please upload a document first."
61
+ response = qa_chain.invoke({"query": query})
62
+ return response["result"]
63
+
64
+ # Gradio UI
65
+ with gr.Blocks() as demo:
66
+ gr.Markdown("## 🧠 Ask Questions About Your Document (LangChain + Hugging Face)")
67
+
68
+ with gr.Row():
69
+ file_input = gr.File(label="πŸ“„ Upload .txt File", type="filepath")
70
+ upload_btn = gr.Button("πŸ”„ Process Document")
71
+
72
+ upload_output = gr.Textbox(label="πŸ“ Status", interactive=False)
73
+
74
+ with gr.Row():
75
+ query_input = gr.Textbox(label="❓ Your Question")
76
+ query_btn = gr.Button("🧠 Get Answer")
77
+
78
+ answer_output = gr.Textbox(label="βœ… Answer", lines=4)
79
+
80
+ upload_btn.click(upload_and_prepare, inputs=file_input, outputs=upload_output)
81
+ query_btn.click(ask_question, inputs=query_input, outputs=answer_output)
82
+
83
+ # For local dev use: demo.launch()
84
+ # For HF Spaces
85
+ if __name__ == "__main__":
86
+ demo.launch()
87
+
88
+
requirements.txt ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ gradio==5.36.2
3
+
4
+ transformers==4.53.3
5
+
6
+ sentence-transformers==3.0.1
7
+
8
+ langchain==0.3.27
9
+
10
+ faiss-cpu==1.8.0
11
+
12
+ langchain-community==0.3.27
13
+
14
+ numpy<2