Huzaifa424 commited on
Commit
b16ae29
Β·
verified Β·
1 Parent(s): 96071c0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -31
app.py CHANGED
@@ -11,7 +11,11 @@ MODEL_ID = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
11
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
12
 
13
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
14
- model = AutoModelForCausalLM.from_pretrained(MODEL_ID, torch_dtype=torch.float16, device_map="auto")
 
 
 
 
15
 
16
  # Load embedding model for RAG
17
  embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
@@ -20,24 +24,18 @@ vector_store = None
20
  # Function to process PDF and create vector database
21
  def process_pdf(pdf_path):
22
  global vector_store
23
- if not pdf_path:
24
- return "❌ No PDF uploaded. Please upload a valid file."
25
-
26
  loader = PyPDFLoader(pdf_path)
27
  documents = loader.load()
28
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
29
  texts = text_splitter.split_documents(documents)
30
 
31
- if not texts:
32
- return "❌ No text extracted from the PDF."
33
-
34
  vector_store = FAISS.from_documents(texts, embedding_model)
35
- return "βœ… PDF successfully processed and indexed."
36
 
37
  # RAG Query Function
38
- def query_rag(message, system_prompt, temperature, max_new_tokens, top_k, repetition_penalty, top_p, history=[]):
39
  if vector_store is None:
40
- return "⚠️ Please upload and process a PDF first."
41
 
42
  # Retrieve relevant chunks
43
  docs = vector_store.similarity_search(message, k=3)
@@ -51,17 +49,18 @@ def query_rag(message, system_prompt, temperature, max_new_tokens, top_k, repeti
51
  # Tokenization
52
  enc = tokenizer(instruction, return_tensors="pt", padding=True, truncation=True)
53
  input_ids = enc.input_ids.to(device)
54
-
 
55
  # Generate response
56
  output_ids = model.generate(
57
  input_ids,
 
58
  do_sample=True,
59
- temperature=temperature,
60
- max_new_tokens=max_new_tokens,
61
- top_k=top_k,
62
- repetition_penalty=repetition_penalty,
63
- top_p=top_p,
64
- pad_token_id=tokenizer.eos_token_id # Ensures correct padding
65
  )
66
  response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
67
  return response
@@ -69,29 +68,26 @@ def query_rag(message, system_prompt, temperature, max_new_tokens, top_k, repeti
69
  # Gradio Interface
70
  def launch_interface():
71
  with gr.Blocks() as demo:
72
- gr.Markdown("## πŸ€– RAG Chatbot with DeepSeek AI")
73
-
74
- with gr.Row():
75
- pdf_uploader = gr.File(label="πŸ“‚ Upload PDF", type="filepath")
76
- process_btn = gr.Button("πŸ“Œ Process PDF")
77
-
78
  process_output = gr.Textbox(label="Processing Status", interactive=False)
79
 
80
  chatbot = gr.ChatInterface(
81
- fn=query_rag,
82
  additional_inputs=[
83
- gr.Textbox("You are a helpful assistant.", label="πŸ”Ή System Prompt"),
84
- gr.Slider(0.1, 1.0, 0.6, label="🌑️ Temperature"),
85
- gr.Slider(100, 32000, 1024, step=100, label="πŸ“ Max new tokens"),
86
- gr.Slider(1, 80, 40, step=1, label="🎯 Top K"),
87
- gr.Slider(0.1, 2.0, 1.1, step=0.1, label="πŸ”„ Repetition Penalty"),
88
- gr.Slider(0.1, 1.0, 0.95, step=0.05, label="πŸ”’ Top P"),
89
  ]
90
  )
91
 
92
  process_btn.click(process_pdf, inputs=[pdf_uploader], outputs=[process_output])
93
 
94
- demo.launch()
95
 
96
  if __name__ == "__main__":
97
  launch_interface()
 
11
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
12
 
13
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
14
+ model = AutoModelForCausalLM.from_pretrained(MODEL_ID, device_map="auto")
15
+
16
+ # Ensure PAD token is set correctly
17
+ if tokenizer.pad_token is None:
18
+ tokenizer.pad_token = tokenizer.eos_token
19
 
20
  # Load embedding model for RAG
21
  embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
 
24
  # Function to process PDF and create vector database
25
  def process_pdf(pdf_path):
26
  global vector_store
 
 
 
27
  loader = PyPDFLoader(pdf_path)
28
  documents = loader.load()
29
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
30
  texts = text_splitter.split_documents(documents)
31
 
 
 
 
32
  vector_store = FAISS.from_documents(texts, embedding_model)
33
+ return "PDF successfully processed and indexed."
34
 
35
  # RAG Query Function
36
+ def query_rag(message, system_prompt, temperature, max_new_tokens, top_k, repetition_penalty, top_p, history=None):
37
  if vector_store is None:
38
+ return "Please upload and process a PDF first."
39
 
40
  # Retrieve relevant chunks
41
  docs = vector_store.similarity_search(message, k=3)
 
49
  # Tokenization
50
  enc = tokenizer(instruction, return_tensors="pt", padding=True, truncation=True)
51
  input_ids = enc.input_ids.to(device)
52
+ attention_mask = enc.attention_mask.to(device)
53
+
54
  # Generate response
55
  output_ids = model.generate(
56
  input_ids,
57
+ attention_mask=attention_mask, # Fix for attention mask issue
58
  do_sample=True,
59
+ temperature=float(temperature),
60
+ max_new_tokens=int(max_new_tokens),
61
+ top_k=int(top_k),
62
+ repetition_penalty=float(repetition_penalty), # Fix: Ensure it's a float
63
+ top_p=float(top_p)
 
64
  )
65
  response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
66
  return response
 
68
  # Gradio Interface
69
  def launch_interface():
70
  with gr.Blocks() as demo:
71
+ gr.Markdown("## πŸ€– RAG Chatbot with DeepSeek")
72
+ pdf_uploader = gr.File(label="Upload PDF", type="filepath")
73
+ process_btn = gr.Button("Process PDF")
 
 
 
74
  process_output = gr.Textbox(label="Processing Status", interactive=False)
75
 
76
  chatbot = gr.ChatInterface(
77
+ query_rag,
78
  additional_inputs=[
79
+ gr.Textbox("You are a helpful assistant.", label="System Prompt"),
80
+ gr.Slider(0.1, 1, 0.6, label="Temperature"), # Fix: Start from 0.1
81
+ gr.Slider(1, 32000, 10000, label="Max new tokens"),
82
+ gr.Slider(1, 50, 40, label="Top K"), # Adjusted range
83
+ gr.Slider(1.0, 2.0, 1.1, label="Repetition Penalty"), # Fix: Should be 1.0-2.0
84
+ gr.Slider(0.1, 1, 0.95, label="Top P"), # Fix: Should be 0.1-1
85
  ]
86
  )
87
 
88
  process_btn.click(process_pdf, inputs=[pdf_uploader], outputs=[process_output])
89
 
90
+ demo.launch(share=True) # Enable public link
91
 
92
  if __name__ == "__main__":
93
  launch_interface()