Spaces:

flyfir248
/

Qsearch

Paused

App Files Files Community

flyfir248 commited on Jan 31

Commit

8befb33

1 Parent(s): 2f7d8ba

Commit : Updated rag.html and the routes.py for the purposes of working with TheBloke/LLaMA-Pro-8B-Instruct-GGUF

Browse files

Files changed (2) hide show

App/routes.py +76 -35
Templates/rag.html +1 -14

App/routes.py CHANGED Viewed

@@ -451,8 +451,8 @@ embeddings = HuggingFaceEmbeddings(
 # Use a local GGUF model that supports text-generation
 llm = CTransformers(
-    model="TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF",
-    model_file="tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf",
     model_type="llama",
     config={
         'max_new_tokens': 512,
@@ -493,8 +493,46 @@ prompt = ChatPromptTemplate.from_template(
 # ----------------------------
 # 3. ROUTE
 # ----------------------------
 @main_bp.route('/rag', methods=['GET', 'POST'])
 def rag_search():
     query = ""
     answer = ""
     sources = []
@@ -503,54 +541,57 @@ def rag_search():
         query = request.form.get('q', '').strip()
         pdf_file = request.files.get('pdf_file')
         if pdf_file and pdf_file.filename.endswith('.pdf'):
             try:
-                # 1) Read & Split PDF
                 reader = PyPDF2.PdfReader(pdf_file)
                 full_text = "".join([p.extract_text() or "" for p in reader.pages])
-                # Improved splitting to keep context meaningful
-                splitter = RecursiveCharacterTextSplitter(
-                    chunk_size=500,
-                    chunk_overlap=50,
-                    separators=["\n\n", "\n", ".", " "]
-                )
                 chunks = splitter.split_text(full_text)
-                # 2) Build Local Vector DB
                 vector_db = FAISS.from_texts(chunks, embeddings)
-                retriever = vector_db.as_retriever(search_kwargs={"k": 3})
-                # 3) Build MODERN LCEL Chain with Output Cleaning
-                rag_chain = (
-                        {
-                            "context": retriever | format_docs,
-                            "input": RunnablePassthrough()
-                        }
-                        | prompt
-                        | llm
-                        | StrOutputParser()
-                        | extract_answer  # Cleans the TinyLlama prompt tags
-                )
-                if query:
-                    # 4) Execute Chain
-                    docs = retriever.invoke(query)
-                    sources = [doc.page_content for doc in docs]
-                    # This now returns only the cleaned string
-                    answer = rag_chain.invoke(query)
-                else:
-                    answer = "PDF uploaded successfully. What is your question?"
             except Exception as e:
                 print("RAG Error:", e)
                 answer = f"System Error: {str(e)}"
-        else:
-            answer = "Please upload a valid PDF file."
-    return render_template("rag.html", query=query, answer=answer, sources=sources)
 # ====== EXISTING DISCOVERY ROUTE (PRESERVED) ======
 @main_bp.route('/discovery')

 # Use a local GGUF model that supports text-generation
 llm = CTransformers(
+    model="TheBloke/LLaMA-Pro-8B-Instruct-GGUF",
+    model_file="llama-pro-8b-instruct.Q2_K.gguf",    ### llama-pro-8b-instruct.Q2_K.gguf
     model_type="llama",
     config={
         'max_new_tokens': 512,
 # ----------------------------
 # 3. ROUTE
 # ----------------------------
+from flask import request, render_template, session  # Import session
+# ... (Keep your existing imports and LLM/Embeddings setup) ...
+# ----------------------------
+# 1. HELPER: Rephrase Question
+# ----------------------------
+def get_standalone_question(current_query, history):
+    """
+    If the user asks a follow-up (e.g., 'What are its results?'),
+    this merges it with history to create a searchable query.
+    """
+    if not history:
+        return current_query
+    # Format last 2-3 exchanges to keep context window clean
+    hist_str = "\n".join([f"{m['role']}: {m['content']}" for m in history[-4:]])
+    rephrase_prompt = f"""<|system|>
+Given the chat history and a follow-up question, rephrase the follow-up into a standalone question.
+Do not answer it, just rephrase it.</s>
+<|user|>
+History: {hist_str}
+Follow-up: {current_query}</s>
+<|assistant|>"""
+    raw_rephrased = llm.invoke(rephrase_prompt)
+    return extract_answer(raw_rephrased)
+# ----------------------------
+# 2. UPDATED ROUTE
+# ----------------------------
 @main_bp.route('/rag', methods=['GET', 'POST'])
 def rag_search():
+    # Initialize chat history in session if not present
+    if 'chat_history' not in session:
+        session['chat_history'] = []
     query = ""
     answer = ""
     sources = []
         query = request.form.get('q', '').strip()
         pdf_file = request.files.get('pdf_file')
+        # If a NEW PDF is uploaded, clear the old conversation history
         if pdf_file and pdf_file.filename.endswith('.pdf'):
+            session['chat_history'] = []
             try:
                 reader = PyPDF2.PdfReader(pdf_file)
                 full_text = "".join([p.extract_text() or "" for p in reader.pages])
+                splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
                 chunks = splitter.split_text(full_text)
+                # IMPORTANT: For a real app, you'd want to save this vector_db
+                # globally or to disk so you don't re-index on every follow-up question.
                 vector_db = FAISS.from_texts(chunks, embeddings)
+                # Store retriever globally or in a way that persists for the session
+                global_retriever = vector_db.as_retriever(search_kwargs={"k": 3})
+            except Exception as e:
+                return render_template("rag.html", answer=f"Upload Error: {str(e)}")
+        if query:
+            try:
+                # A) Rephrase the question using history
+                standalone_query = get_standalone_question(query, session['chat_history'])
+                # B) Retrieve docs using the rephrased query
+                # (Assuming global_retriever is available from the upload step)
+                docs = global_retriever.invoke(standalone_query)
+                sources = [doc.page_content for doc in docs]
+                context = format_docs(docs)
+                # C) Generate Answer
+                full_prompt = prompt.format(context=context, input=query)
+                raw_answer = llm.invoke(full_prompt)
+                answer = extract_answer(raw_answer)
+                # D) Update Session History
+                updated_history = session['chat_history']
+                updated_history.append({"role": "user", "content": query})
+                updated_history.append({"role": "assistant", "content": answer})
+                # Keep only last 6 messages (3 turns) to stay within LLM limits
+                session['chat_history'] = updated_history[-6:]
+                session.modified = True
             except Exception as e:
                 print("RAG Error:", e)
                 answer = f"System Error: {str(e)}"
+    return render_template("rag.html",
+                           query=query,
+                           answer=answer,
+                           sources=sources,
+                           history=session.get('chat_history', []))
 # ====== EXISTING DISCOVERY ROUTE (PRESERVED) ======
 @main_bp.route('/discovery')

Templates/rag.html CHANGED Viewed

@@ -3,20 +3,7 @@
 {% block content %}
 <div class="min-h-screen bg-[#f8fafc] p-8 font-sans">
     <div class="max-w-7xl mx-auto">
-        <div class="mb-10 flex items-center justify-between">
-            <div class="flex items-center gap-3">
-                <div class="bg-blue-600 p-2 rounded-lg">
-                    <i class="fas fa-microscope text-white text-xl"></i>
-                </div>
-                <h1 class="text-2xl font-bold text-[#1e293b] tracking-tight">Qsearch Intelligence</h1>
-            </div>
-            <div class="flex gap-6 text-sm font-medium text-slate-500">
-                <span class="hover:text-blue-600 cursor-pointer flex items-center gap-2"><i class="fas fa-database"></i> Artifacts</span>
-                <span class="hover:text-blue-600 cursor-pointer flex items-center gap-2"><i class="fas fa-file-alt"></i> Publications</span>
-                <span class="hover:text-blue-600 cursor-pointer flex items-center gap-2"><i class="fas fa-graduation-cap"></i> Scholar</span>
-                <button class="bg-slate-100 px-4 py-2 rounded-md text-slate-700 hover:bg-slate-200 transition-all">Analysis Tools <i class="fas fa-chevron-down ml-1 text-xs"></i></button>
-            </div>
-        </div>
         <div class="bg-white rounded-3xl shadow-sm border border-slate-200 p-2 mb-10">
             <form action="/rag" method="POST" enctype="multipart/form-data" onsubmit="return showLoading()" class="flex items-center gap-2">

 {% block content %}
 <div class="min-h-screen bg-[#f8fafc] p-8 font-sans">
     <div class="max-w-7xl mx-auto">
         <div class="bg-white rounded-3xl shadow-sm border border-slate-200 p-2 mb-10">
             <form action="/rag" method="POST" enctype="multipart/form-data" onsubmit="return showLoading()" class="flex items-center gap-2">