Huzaifa424 commited on
Commit
b7d6ae5
·
verified ·
1 Parent(s): 8cab7d7

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +85 -0
app.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import gradio as gr
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
4
+ from langchain.embeddings import HuggingFaceEmbeddings
5
+ from langchain.vectorstores import FAISS
6
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
7
+ from langchain.document_loaders import PyPDFLoader
8
+
9
+ # Model and Tokenizer
10
+ MODEL_ID = "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B"
11
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
12
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
13
+ model = AutoModelForCausalLM.from_pretrained(MODEL_ID, device_map="auto")
14
+
15
+ # Load embedding model for RAG
16
+ embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
17
+ vector_store = None
18
+
19
+ # Function to process PDF and create vector database
20
+ def process_pdf(pdf_path):
21
+ global vector_store
22
+ loader = PyPDFLoader(pdf_path)
23
+ documents = loader.load()
24
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
25
+ texts = text_splitter.split_documents(documents)
26
+
27
+ vector_store = FAISS.from_documents(texts, embedding_model)
28
+ return "PDF successfully processed and indexed."
29
+
30
+ # RAG Query Function
31
+ def query_rag(message, system_prompt, temperature, max_new_tokens, top_k, repetition_penalty, top_p):
32
+ if vector_store is None:
33
+ return "Please upload and process a PDF first."
34
+
35
+ # Retrieve relevant chunks
36
+ docs = vector_store.similarity_search(message, k=3)
37
+ context = "\n".join([doc.page_content for doc in docs])
38
+
39
+ # Construct prompt
40
+ instruction = f"<|im_start|>system\n{system_prompt}\n<|im_end|>\n"
41
+ instruction += f"Relevant context:\n{context}\n"
42
+ instruction += f"<|im_start|>user\n{message}\n<|im_end|>\n<|im_start|>assistant\n"
43
+
44
+ # Tokenization
45
+ enc = tokenizer(instruction, return_tensors="pt", padding=True, truncation=True)
46
+ input_ids = enc.input_ids.to(device)
47
+
48
+ # Generate response
49
+ output_ids = model.generate(
50
+ input_ids,
51
+ do_sample=True,
52
+ temperature=temperature,
53
+ max_new_tokens=max_new_tokens,
54
+ top_k=top_k,
55
+ repetition_penalty=repetition_penalty,
56
+ top_p=top_p
57
+ )
58
+ response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
59
+ return response
60
+
61
+ # Gradio Interface
62
+ def launch_interface():
63
+ with gr.Blocks() as demo:
64
+ gr.Markdown("## 🤖 RAG Chatbot with DeepSeek")
65
+ pdf_uploader = gr.File(label="Upload PDF", type="file")
66
+ process_btn = gr.Button("Process PDF")
67
+ process_output = gr.Textbox(label="Processing Status", interactive=False)
68
+
69
+ chatbot = gr.ChatInterface(query_rag,
70
+ additional_inputs=[
71
+ gr.Textbox("You are a helpful assistant.", label="System Prompt"),
72
+ gr.Slider(0, 1, 0.6, label="Temperature"),
73
+ gr.Slider(0, 32000, 10000, label="Max new tokens"),
74
+ gr.Slider(1, 80, 40, label="Top K"),
75
+ gr.Slider(0, 2, 1.1, label="Repetition Penalty"),
76
+ gr.Slider(0, 1, 0.95, label="Top P"),
77
+ ]
78
+ )
79
+
80
+ process_btn.click(process_pdf, inputs=[pdf_uploader], outputs=[process_output])
81
+
82
+ demo.launch()
83
+
84
+ if __name__ == "__main__":
85
+ launch_interface()