Huzaifa424 commited on
Commit
96071c0
Β·
verified Β·
1 Parent(s): d8c184b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -16
app.py CHANGED
@@ -1,6 +1,6 @@
1
  import torch
2
  import gradio as gr
3
- from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
4
  from langchain.embeddings import HuggingFaceEmbeddings
5
  from langchain.vectorstores import FAISS
6
  from langchain.text_splitter import RecursiveCharacterTextSplitter
@@ -9,8 +9,9 @@ from langchain.document_loaders import PyPDFLoader
9
  # Model and Tokenizer
10
  MODEL_ID = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
11
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
12
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
13
- model = AutoModelForCausalLM.from_pretrained(MODEL_ID, device_map="auto")
14
 
15
  # Load embedding model for RAG
16
  embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
@@ -19,18 +20,24 @@ vector_store = None
19
  # Function to process PDF and create vector database
20
  def process_pdf(pdf_path):
21
  global vector_store
 
 
 
22
  loader = PyPDFLoader(pdf_path)
23
  documents = loader.load()
24
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
25
  texts = text_splitter.split_documents(documents)
26
 
 
 
 
27
  vector_store = FAISS.from_documents(texts, embedding_model)
28
- return "PDF successfully processed and indexed."
29
 
30
  # RAG Query Function
31
- def query_rag(message, system_prompt, temperature, max_new_tokens, top_k, repetition_penalty, top_p, history=None):
32
  if vector_store is None:
33
- return "Please upload and process a PDF first."
34
 
35
  # Retrieve relevant chunks
36
  docs = vector_store.similarity_search(message, k=3)
@@ -53,7 +60,8 @@ def query_rag(message, system_prompt, temperature, max_new_tokens, top_k, repeti
53
  max_new_tokens=max_new_tokens,
54
  top_k=top_k,
55
  repetition_penalty=repetition_penalty,
56
- top_p=top_p
 
57
  )
58
  response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
59
  return response
@@ -61,19 +69,23 @@ def query_rag(message, system_prompt, temperature, max_new_tokens, top_k, repeti
61
  # Gradio Interface
62
  def launch_interface():
63
  with gr.Blocks() as demo:
64
- gr.Markdown("## πŸ€– RAG Chatbot with DeepSeek")
65
- pdf_uploader = gr.File(label="Upload PDF", type="filepath")
66
- process_btn = gr.Button("Process PDF")
 
 
 
67
  process_output = gr.Textbox(label="Processing Status", interactive=False)
68
 
69
- chatbot = gr.ChatInterface(query_rag,
 
70
  additional_inputs=[
71
- gr.Textbox("You are a helpful assistant.", label="System Prompt"),
72
- gr.Slider(0, 1, 0.6, label="Temperature"),
73
- gr.Slider(0, 32000, 10000, label="Max new tokens"),
74
- gr.Slider(1, 80, 40, label="Top K"),
75
- gr.Slider(0.1, 2.0, 1.1, label="Repetition Penalty"),
76
- gr.Slider(0, 1, 0.95, label="Top P"),
77
  ]
78
  )
79
 
 
1
  import torch
2
  import gradio as gr
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer
4
  from langchain.embeddings import HuggingFaceEmbeddings
5
  from langchain.vectorstores import FAISS
6
  from langchain.text_splitter import RecursiveCharacterTextSplitter
 
9
  # Model and Tokenizer
10
  MODEL_ID = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
11
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
12
+
13
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
14
+ model = AutoModelForCausalLM.from_pretrained(MODEL_ID, torch_dtype=torch.float16, device_map="auto")
15
 
16
  # Load embedding model for RAG
17
  embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
 
20
  # Function to process PDF and create vector database
21
  def process_pdf(pdf_path):
22
  global vector_store
23
+ if not pdf_path:
24
+ return "❌ No PDF uploaded. Please upload a valid file."
25
+
26
  loader = PyPDFLoader(pdf_path)
27
  documents = loader.load()
28
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
29
  texts = text_splitter.split_documents(documents)
30
 
31
+ if not texts:
32
+ return "❌ No text extracted from the PDF."
33
+
34
  vector_store = FAISS.from_documents(texts, embedding_model)
35
+ return "βœ… PDF successfully processed and indexed."
36
 
37
  # RAG Query Function
38
+ def query_rag(message, system_prompt, temperature, max_new_tokens, top_k, repetition_penalty, top_p, history=[]):
39
  if vector_store is None:
40
+ return "⚠️ Please upload and process a PDF first."
41
 
42
  # Retrieve relevant chunks
43
  docs = vector_store.similarity_search(message, k=3)
 
60
  max_new_tokens=max_new_tokens,
61
  top_k=top_k,
62
  repetition_penalty=repetition_penalty,
63
+ top_p=top_p,
64
+ pad_token_id=tokenizer.eos_token_id # Ensures correct padding
65
  )
66
  response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
67
  return response
 
69
  # Gradio Interface
70
  def launch_interface():
71
  with gr.Blocks() as demo:
72
+ gr.Markdown("## πŸ€– RAG Chatbot with DeepSeek AI")
73
+
74
+ with gr.Row():
75
+ pdf_uploader = gr.File(label="πŸ“‚ Upload PDF", type="filepath")
76
+ process_btn = gr.Button("πŸ“Œ Process PDF")
77
+
78
  process_output = gr.Textbox(label="Processing Status", interactive=False)
79
 
80
+ chatbot = gr.ChatInterface(
81
+ fn=query_rag,
82
  additional_inputs=[
83
+ gr.Textbox("You are a helpful assistant.", label="πŸ”Ή System Prompt"),
84
+ gr.Slider(0.1, 1.0, 0.6, label="🌑️ Temperature"),
85
+ gr.Slider(100, 32000, 1024, step=100, label="πŸ“ Max new tokens"),
86
+ gr.Slider(1, 80, 40, step=1, label="🎯 Top K"),
87
+ gr.Slider(0.1, 2.0, 1.1, step=0.1, label="πŸ”„ Repetition Penalty"),
88
+ gr.Slider(0.1, 1.0, 0.95, step=0.05, label="πŸ”’ Top P"),
89
  ]
90
  )
91