Commit : Updated rag.html and the routes.py for the purposes of working with TheBloke/LLaMA-Pro-8B-Instruct-GGUF
Browse files- App/routes.py +76 -35
- Templates/rag.html +1 -14
App/routes.py
CHANGED
|
@@ -451,8 +451,8 @@ embeddings = HuggingFaceEmbeddings(
|
|
| 451 |
|
| 452 |
# Use a local GGUF model that supports text-generation
|
| 453 |
llm = CTransformers(
|
| 454 |
-
model="TheBloke/
|
| 455 |
-
model_file="
|
| 456 |
model_type="llama",
|
| 457 |
config={
|
| 458 |
'max_new_tokens': 512,
|
|
@@ -493,8 +493,46 @@ prompt = ChatPromptTemplate.from_template(
|
|
| 493 |
# ----------------------------
|
| 494 |
# 3. ROUTE
|
| 495 |
# ----------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 496 |
@main_bp.route('/rag', methods=['GET', 'POST'])
|
| 497 |
def rag_search():
|
|
|
|
|
|
|
|
|
|
|
|
|
| 498 |
query = ""
|
| 499 |
answer = ""
|
| 500 |
sources = []
|
|
@@ -503,54 +541,57 @@ def rag_search():
|
|
| 503 |
query = request.form.get('q', '').strip()
|
| 504 |
pdf_file = request.files.get('pdf_file')
|
| 505 |
|
|
|
|
| 506 |
if pdf_file and pdf_file.filename.endswith('.pdf'):
|
|
|
|
| 507 |
try:
|
| 508 |
-
# 1) Read & Split PDF
|
| 509 |
reader = PyPDF2.PdfReader(pdf_file)
|
| 510 |
full_text = "".join([p.extract_text() or "" for p in reader.pages])
|
| 511 |
-
|
| 512 |
-
# Improved splitting to keep context meaningful
|
| 513 |
-
splitter = RecursiveCharacterTextSplitter(
|
| 514 |
-
chunk_size=500,
|
| 515 |
-
chunk_overlap=50,
|
| 516 |
-
separators=["\n\n", "\n", ".", " "]
|
| 517 |
-
)
|
| 518 |
chunks = splitter.split_text(full_text)
|
| 519 |
|
| 520 |
-
#
|
|
|
|
| 521 |
vector_db = FAISS.from_texts(chunks, embeddings)
|
| 522 |
-
retriever
|
|
|
|
|
|
|
|
|
|
| 523 |
|
| 524 |
-
|
| 525 |
-
|
| 526 |
-
|
| 527 |
-
|
| 528 |
-
|
| 529 |
-
|
| 530 |
-
|
| 531 |
-
|
| 532 |
-
|
| 533 |
-
|
| 534 |
-
)
|
| 535 |
|
| 536 |
-
|
| 537 |
-
|
| 538 |
-
|
| 539 |
-
|
| 540 |
|
| 541 |
-
|
| 542 |
-
|
| 543 |
-
|
| 544 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 545 |
|
| 546 |
except Exception as e:
|
| 547 |
print("RAG Error:", e)
|
| 548 |
answer = f"System Error: {str(e)}"
|
| 549 |
-
else:
|
| 550 |
-
answer = "Please upload a valid PDF file."
|
| 551 |
-
|
| 552 |
-
return render_template("rag.html", query=query, answer=answer, sources=sources)
|
| 553 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 554 |
# ====== EXISTING DISCOVERY ROUTE (PRESERVED) ======
|
| 555 |
|
| 556 |
@main_bp.route('/discovery')
|
|
|
|
| 451 |
|
| 452 |
# Use a local GGUF model that supports text-generation
|
| 453 |
llm = CTransformers(
|
| 454 |
+
model="TheBloke/LLaMA-Pro-8B-Instruct-GGUF",
|
| 455 |
+
model_file="llama-pro-8b-instruct.Q2_K.gguf", ### llama-pro-8b-instruct.Q2_K.gguf
|
| 456 |
model_type="llama",
|
| 457 |
config={
|
| 458 |
'max_new_tokens': 512,
|
|
|
|
| 493 |
# ----------------------------
|
| 494 |
# 3. ROUTE
|
| 495 |
# ----------------------------
|
| 496 |
+
from flask import request, render_template, session # Import session
|
| 497 |
+
|
| 498 |
+
|
| 499 |
+
# ... (Keep your existing imports and LLM/Embeddings setup) ...
|
| 500 |
+
|
| 501 |
+
# ----------------------------
|
| 502 |
+
# 1. HELPER: Rephrase Question
|
| 503 |
+
# ----------------------------
|
| 504 |
+
def get_standalone_question(current_query, history):
|
| 505 |
+
"""
|
| 506 |
+
If the user asks a follow-up (e.g., 'What are its results?'),
|
| 507 |
+
this merges it with history to create a searchable query.
|
| 508 |
+
"""
|
| 509 |
+
if not history:
|
| 510 |
+
return current_query
|
| 511 |
+
|
| 512 |
+
# Format last 2-3 exchanges to keep context window clean
|
| 513 |
+
hist_str = "\n".join([f"{m['role']}: {m['content']}" for m in history[-4:]])
|
| 514 |
+
|
| 515 |
+
rephrase_prompt = f"""<|system|>
|
| 516 |
+
Given the chat history and a follow-up question, rephrase the follow-up into a standalone question.
|
| 517 |
+
Do not answer it, just rephrase it.</s>
|
| 518 |
+
<|user|>
|
| 519 |
+
History: {hist_str}
|
| 520 |
+
Follow-up: {current_query}</s>
|
| 521 |
+
<|assistant|>"""
|
| 522 |
+
|
| 523 |
+
raw_rephrased = llm.invoke(rephrase_prompt)
|
| 524 |
+
return extract_answer(raw_rephrased)
|
| 525 |
+
|
| 526 |
+
|
| 527 |
+
# ----------------------------
|
| 528 |
+
# 2. UPDATED ROUTE
|
| 529 |
+
# ----------------------------
|
| 530 |
@main_bp.route('/rag', methods=['GET', 'POST'])
|
| 531 |
def rag_search():
|
| 532 |
+
# Initialize chat history in session if not present
|
| 533 |
+
if 'chat_history' not in session:
|
| 534 |
+
session['chat_history'] = []
|
| 535 |
+
|
| 536 |
query = ""
|
| 537 |
answer = ""
|
| 538 |
sources = []
|
|
|
|
| 541 |
query = request.form.get('q', '').strip()
|
| 542 |
pdf_file = request.files.get('pdf_file')
|
| 543 |
|
| 544 |
+
# If a NEW PDF is uploaded, clear the old conversation history
|
| 545 |
if pdf_file and pdf_file.filename.endswith('.pdf'):
|
| 546 |
+
session['chat_history'] = []
|
| 547 |
try:
|
|
|
|
| 548 |
reader = PyPDF2.PdfReader(pdf_file)
|
| 549 |
full_text = "".join([p.extract_text() or "" for p in reader.pages])
|
| 550 |
+
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 551 |
chunks = splitter.split_text(full_text)
|
| 552 |
|
| 553 |
+
# IMPORTANT: For a real app, you'd want to save this vector_db
|
| 554 |
+
# globally or to disk so you don't re-index on every follow-up question.
|
| 555 |
vector_db = FAISS.from_texts(chunks, embeddings)
|
| 556 |
+
# Store retriever globally or in a way that persists for the session
|
| 557 |
+
global_retriever = vector_db.as_retriever(search_kwargs={"k": 3})
|
| 558 |
+
except Exception as e:
|
| 559 |
+
return render_template("rag.html", answer=f"Upload Error: {str(e)}")
|
| 560 |
|
| 561 |
+
if query:
|
| 562 |
+
try:
|
| 563 |
+
# A) Rephrase the question using history
|
| 564 |
+
standalone_query = get_standalone_question(query, session['chat_history'])
|
| 565 |
+
|
| 566 |
+
# B) Retrieve docs using the rephrased query
|
| 567 |
+
# (Assuming global_retriever is available from the upload step)
|
| 568 |
+
docs = global_retriever.invoke(standalone_query)
|
| 569 |
+
sources = [doc.page_content for doc in docs]
|
| 570 |
+
context = format_docs(docs)
|
|
|
|
| 571 |
|
| 572 |
+
# C) Generate Answer
|
| 573 |
+
full_prompt = prompt.format(context=context, input=query)
|
| 574 |
+
raw_answer = llm.invoke(full_prompt)
|
| 575 |
+
answer = extract_answer(raw_answer)
|
| 576 |
|
| 577 |
+
# D) Update Session History
|
| 578 |
+
updated_history = session['chat_history']
|
| 579 |
+
updated_history.append({"role": "user", "content": query})
|
| 580 |
+
updated_history.append({"role": "assistant", "content": answer})
|
| 581 |
+
|
| 582 |
+
# Keep only last 6 messages (3 turns) to stay within LLM limits
|
| 583 |
+
session['chat_history'] = updated_history[-6:]
|
| 584 |
+
session.modified = True
|
| 585 |
|
| 586 |
except Exception as e:
|
| 587 |
print("RAG Error:", e)
|
| 588 |
answer = f"System Error: {str(e)}"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 589 |
|
| 590 |
+
return render_template("rag.html",
|
| 591 |
+
query=query,
|
| 592 |
+
answer=answer,
|
| 593 |
+
sources=sources,
|
| 594 |
+
history=session.get('chat_history', []))
|
| 595 |
# ====== EXISTING DISCOVERY ROUTE (PRESERVED) ======
|
| 596 |
|
| 597 |
@main_bp.route('/discovery')
|
Templates/rag.html
CHANGED
|
@@ -3,20 +3,7 @@
|
|
| 3 |
{% block content %}
|
| 4 |
<div class="min-h-screen bg-[#f8fafc] p-8 font-sans">
|
| 5 |
<div class="max-w-7xl mx-auto">
|
| 6 |
-
|
| 7 |
-
<div class="flex items-center gap-3">
|
| 8 |
-
<div class="bg-blue-600 p-2 rounded-lg">
|
| 9 |
-
<i class="fas fa-microscope text-white text-xl"></i>
|
| 10 |
-
</div>
|
| 11 |
-
<h1 class="text-2xl font-bold text-[#1e293b] tracking-tight">Qsearch Intelligence</h1>
|
| 12 |
-
</div>
|
| 13 |
-
<div class="flex gap-6 text-sm font-medium text-slate-500">
|
| 14 |
-
<span class="hover:text-blue-600 cursor-pointer flex items-center gap-2"><i class="fas fa-database"></i> Artifacts</span>
|
| 15 |
-
<span class="hover:text-blue-600 cursor-pointer flex items-center gap-2"><i class="fas fa-file-alt"></i> Publications</span>
|
| 16 |
-
<span class="hover:text-blue-600 cursor-pointer flex items-center gap-2"><i class="fas fa-graduation-cap"></i> Scholar</span>
|
| 17 |
-
<button class="bg-slate-100 px-4 py-2 rounded-md text-slate-700 hover:bg-slate-200 transition-all">Analysis Tools <i class="fas fa-chevron-down ml-1 text-xs"></i></button>
|
| 18 |
-
</div>
|
| 19 |
-
</div>
|
| 20 |
|
| 21 |
<div class="bg-white rounded-3xl shadow-sm border border-slate-200 p-2 mb-10">
|
| 22 |
<form action="/rag" method="POST" enctype="multipart/form-data" onsubmit="return showLoading()" class="flex items-center gap-2">
|
|
|
|
| 3 |
{% block content %}
|
| 4 |
<div class="min-h-screen bg-[#f8fafc] p-8 font-sans">
|
| 5 |
<div class="max-w-7xl mx-auto">
|
| 6 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
|
| 8 |
<div class="bg-white rounded-3xl shadow-sm border border-slate-200 p-2 mb-10">
|
| 9 |
<form action="/rag" method="POST" enctype="multipart/form-data" onsubmit="return showLoading()" class="flex items-center gap-2">
|