AymenELKani commited on
Commit
ef248ab
·
verified ·
1 Parent(s): 071665b

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +70 -0
  2. requirements.txt +6 -0
app.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from langchain import HuggingFacePipeline
3
+ from transformers import pipeline
4
+ from langchain.document_loaders import PyPDFLoader
5
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
6
+ from langchain_community.vectorstores import Chroma
7
+ from langchain.chains import RetrievalQA
8
+ from langchain.embeddings import HuggingFaceEmbeddings
9
+
10
+ # --- Load free Hugging Face LLM ---
11
+ # You can switch to another free model if you like
12
+ hf_pipeline = pipeline(
13
+ "text-generation",
14
+ model="tiiuae/falcon-7b-instruct",
15
+ max_new_tokens=256,
16
+ temperature=0.5
17
+ )
18
+ llm = HuggingFacePipeline(pipeline=hf_pipeline)
19
+
20
+ # --- PDF Loader ---
21
+ def document_loader(file):
22
+ loader = PyPDFLoader(file.name)
23
+ return loader.load()
24
+
25
+ # --- Text Splitter ---
26
+ def text_splitter(documents):
27
+ splitter = RecursiveCharacterTextSplitter(
28
+ chunk_size=1000,
29
+ chunk_overlap=20
30
+ )
31
+ return splitter.split_documents(documents)
32
+
33
+ # --- Vector Database using embeddings ---
34
+ def vector_database(chunks):
35
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
36
+ vectordb = Chroma.from_documents(chunks, embeddings)
37
+ return vectordb
38
+
39
+ # --- Build retriever ---
40
+ def retriever(file):
41
+ chunks = text_splitter(document_loader(file))
42
+ vectordb = vector_database(chunks)
43
+ return vectordb.as_retriever()
44
+
45
+ # --- RetrievalQA ---
46
+ def retriever_qa(file, query):
47
+ retriever_obj = retriever(file)
48
+ qa_chain = RetrievalQA.from_chain_type(
49
+ llm=llm,
50
+ chain_type="stuff",
51
+ retriever=retriever_obj,
52
+ return_source_documents=False
53
+ )
54
+ return qa_chain.run(query)
55
+
56
+ # --- Gradio Interface ---
57
+ app = gr.Interface(
58
+ fn=retriever_qa,
59
+ inputs=[
60
+ gr.File(label="Upload PDF", file_types=['.pdf']),
61
+ gr.Textbox(label="Ask a question about the PDF", lines=2, placeholder="Type your question here...")
62
+ ],
63
+ outputs=gr.Textbox(label="Answer"),
64
+ title="Free LLM PDF Q&A Bot",
65
+ description="Upload a PDF document and ask any question. This bot uses a free open-source LLM and vector search to answer your questions."
66
+ )
67
+
68
+ if __name__ == "__main__":
69
+ app.launch()
70
+
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ transformers
2
+ torch
3
+ langchain
4
+ chromadb
5
+ sentence-transformers
6
+ gradio