flyfir248 commited on
Commit
8befb33
·
1 Parent(s): 2f7d8ba

Commit : Updated rag.html and the routes.py for the purposes of working with TheBloke/LLaMA-Pro-8B-Instruct-GGUF

Browse files
Files changed (2) hide show
  1. App/routes.py +76 -35
  2. Templates/rag.html +1 -14
App/routes.py CHANGED
@@ -451,8 +451,8 @@ embeddings = HuggingFaceEmbeddings(
451
 
452
  # Use a local GGUF model that supports text-generation
453
  llm = CTransformers(
454
- model="TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF",
455
- model_file="tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf",
456
  model_type="llama",
457
  config={
458
  'max_new_tokens': 512,
@@ -493,8 +493,46 @@ prompt = ChatPromptTemplate.from_template(
493
  # ----------------------------
494
  # 3. ROUTE
495
  # ----------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
496
  @main_bp.route('/rag', methods=['GET', 'POST'])
497
  def rag_search():
 
 
 
 
498
  query = ""
499
  answer = ""
500
  sources = []
@@ -503,54 +541,57 @@ def rag_search():
503
  query = request.form.get('q', '').strip()
504
  pdf_file = request.files.get('pdf_file')
505
 
 
506
  if pdf_file and pdf_file.filename.endswith('.pdf'):
 
507
  try:
508
- # 1) Read & Split PDF
509
  reader = PyPDF2.PdfReader(pdf_file)
510
  full_text = "".join([p.extract_text() or "" for p in reader.pages])
511
-
512
- # Improved splitting to keep context meaningful
513
- splitter = RecursiveCharacterTextSplitter(
514
- chunk_size=500,
515
- chunk_overlap=50,
516
- separators=["\n\n", "\n", ".", " "]
517
- )
518
  chunks = splitter.split_text(full_text)
519
 
520
- # 2) Build Local Vector DB
 
521
  vector_db = FAISS.from_texts(chunks, embeddings)
522
- retriever = vector_db.as_retriever(search_kwargs={"k": 3})
 
 
 
523
 
524
- # 3) Build MODERN LCEL Chain with Output Cleaning
525
- rag_chain = (
526
- {
527
- "context": retriever | format_docs,
528
- "input": RunnablePassthrough()
529
- }
530
- | prompt
531
- | llm
532
- | StrOutputParser()
533
- | extract_answer # Cleans the TinyLlama prompt tags
534
- )
535
 
536
- if query:
537
- # 4) Execute Chain
538
- docs = retriever.invoke(query)
539
- sources = [doc.page_content for doc in docs]
540
 
541
- # This now returns only the cleaned string
542
- answer = rag_chain.invoke(query)
543
- else:
544
- answer = "PDF uploaded successfully. What is your question?"
 
 
 
 
545
 
546
  except Exception as e:
547
  print("RAG Error:", e)
548
  answer = f"System Error: {str(e)}"
549
- else:
550
- answer = "Please upload a valid PDF file."
551
-
552
- return render_template("rag.html", query=query, answer=answer, sources=sources)
553
 
 
 
 
 
 
554
  # ====== EXISTING DISCOVERY ROUTE (PRESERVED) ======
555
 
556
  @main_bp.route('/discovery')
 
451
 
452
  # Use a local GGUF model that supports text-generation
453
  llm = CTransformers(
454
+ model="TheBloke/LLaMA-Pro-8B-Instruct-GGUF",
455
+ model_file="llama-pro-8b-instruct.Q2_K.gguf", ### llama-pro-8b-instruct.Q2_K.gguf
456
  model_type="llama",
457
  config={
458
  'max_new_tokens': 512,
 
493
  # ----------------------------
494
  # 3. ROUTE
495
  # ----------------------------
496
+ from flask import request, render_template, session # Import session
497
+
498
+
499
+ # ... (Keep your existing imports and LLM/Embeddings setup) ...
500
+
501
+ # ----------------------------
502
+ # 1. HELPER: Rephrase Question
503
+ # ----------------------------
504
+ def get_standalone_question(current_query, history):
505
+ """
506
+ If the user asks a follow-up (e.g., 'What are its results?'),
507
+ this merges it with history to create a searchable query.
508
+ """
509
+ if not history:
510
+ return current_query
511
+
512
+ # Format last 2-3 exchanges to keep context window clean
513
+ hist_str = "\n".join([f"{m['role']}: {m['content']}" for m in history[-4:]])
514
+
515
+ rephrase_prompt = f"""<|system|>
516
+ Given the chat history and a follow-up question, rephrase the follow-up into a standalone question.
517
+ Do not answer it, just rephrase it.</s>
518
+ <|user|>
519
+ History: {hist_str}
520
+ Follow-up: {current_query}</s>
521
+ <|assistant|>"""
522
+
523
+ raw_rephrased = llm.invoke(rephrase_prompt)
524
+ return extract_answer(raw_rephrased)
525
+
526
+
527
+ # ----------------------------
528
+ # 2. UPDATED ROUTE
529
+ # ----------------------------
530
  @main_bp.route('/rag', methods=['GET', 'POST'])
531
  def rag_search():
532
+ # Initialize chat history in session if not present
533
+ if 'chat_history' not in session:
534
+ session['chat_history'] = []
535
+
536
  query = ""
537
  answer = ""
538
  sources = []
 
541
  query = request.form.get('q', '').strip()
542
  pdf_file = request.files.get('pdf_file')
543
 
544
+ # If a NEW PDF is uploaded, clear the old conversation history
545
  if pdf_file and pdf_file.filename.endswith('.pdf'):
546
+ session['chat_history'] = []
547
  try:
 
548
  reader = PyPDF2.PdfReader(pdf_file)
549
  full_text = "".join([p.extract_text() or "" for p in reader.pages])
550
+ splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
 
 
 
 
 
 
551
  chunks = splitter.split_text(full_text)
552
 
553
+ # IMPORTANT: For a real app, you'd want to save this vector_db
554
+ # globally or to disk so you don't re-index on every follow-up question.
555
  vector_db = FAISS.from_texts(chunks, embeddings)
556
+ # Store retriever globally or in a way that persists for the session
557
+ global_retriever = vector_db.as_retriever(search_kwargs={"k": 3})
558
+ except Exception as e:
559
+ return render_template("rag.html", answer=f"Upload Error: {str(e)}")
560
 
561
+ if query:
562
+ try:
563
+ # A) Rephrase the question using history
564
+ standalone_query = get_standalone_question(query, session['chat_history'])
565
+
566
+ # B) Retrieve docs using the rephrased query
567
+ # (Assuming global_retriever is available from the upload step)
568
+ docs = global_retriever.invoke(standalone_query)
569
+ sources = [doc.page_content for doc in docs]
570
+ context = format_docs(docs)
 
571
 
572
+ # C) Generate Answer
573
+ full_prompt = prompt.format(context=context, input=query)
574
+ raw_answer = llm.invoke(full_prompt)
575
+ answer = extract_answer(raw_answer)
576
 
577
+ # D) Update Session History
578
+ updated_history = session['chat_history']
579
+ updated_history.append({"role": "user", "content": query})
580
+ updated_history.append({"role": "assistant", "content": answer})
581
+
582
+ # Keep only last 6 messages (3 turns) to stay within LLM limits
583
+ session['chat_history'] = updated_history[-6:]
584
+ session.modified = True
585
 
586
  except Exception as e:
587
  print("RAG Error:", e)
588
  answer = f"System Error: {str(e)}"
 
 
 
 
589
 
590
+ return render_template("rag.html",
591
+ query=query,
592
+ answer=answer,
593
+ sources=sources,
594
+ history=session.get('chat_history', []))
595
  # ====== EXISTING DISCOVERY ROUTE (PRESERVED) ======
596
 
597
  @main_bp.route('/discovery')
Templates/rag.html CHANGED
@@ -3,20 +3,7 @@
3
  {% block content %}
4
  <div class="min-h-screen bg-[#f8fafc] p-8 font-sans">
5
  <div class="max-w-7xl mx-auto">
6
- <div class="mb-10 flex items-center justify-between">
7
- <div class="flex items-center gap-3">
8
- <div class="bg-blue-600 p-2 rounded-lg">
9
- <i class="fas fa-microscope text-white text-xl"></i>
10
- </div>
11
- <h1 class="text-2xl font-bold text-[#1e293b] tracking-tight">Qsearch Intelligence</h1>
12
- </div>
13
- <div class="flex gap-6 text-sm font-medium text-slate-500">
14
- <span class="hover:text-blue-600 cursor-pointer flex items-center gap-2"><i class="fas fa-database"></i> Artifacts</span>
15
- <span class="hover:text-blue-600 cursor-pointer flex items-center gap-2"><i class="fas fa-file-alt"></i> Publications</span>
16
- <span class="hover:text-blue-600 cursor-pointer flex items-center gap-2"><i class="fas fa-graduation-cap"></i> Scholar</span>
17
- <button class="bg-slate-100 px-4 py-2 rounded-md text-slate-700 hover:bg-slate-200 transition-all">Analysis Tools <i class="fas fa-chevron-down ml-1 text-xs"></i></button>
18
- </div>
19
- </div>
20
 
21
  <div class="bg-white rounded-3xl shadow-sm border border-slate-200 p-2 mb-10">
22
  <form action="/rag" method="POST" enctype="multipart/form-data" onsubmit="return showLoading()" class="flex items-center gap-2">
 
3
  {% block content %}
4
  <div class="min-h-screen bg-[#f8fafc] p-8 font-sans">
5
  <div class="max-w-7xl mx-auto">
6
+
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
  <div class="bg-white rounded-3xl shadow-sm border border-slate-200 p-2 mb-10">
9
  <form action="/rag" method="POST" enctype="multipart/form-data" onsubmit="return showLoading()" class="flex items-center gap-2">