Spaces:

Zeri00
/

Cogni-chat-document-reader

Sleeping

App Files Files Community

riteshraut commited on Oct 28, 2025

Commit

bd93e48

1 Parent(s): 5fd359f

new feature

Browse files

Files changed (5) hide show

app.py +282 -152
evaluate.py +205 -0
query_expansion.py +524 -0
rag_processor.py +382 -78
templates/index.html +880 -610

app.py CHANGED Viewed

@@ -1,7 +1,6 @@
 import os
-import time
 import uuid
-from flask import Flask, request, render_template, session, jsonify, Response
 from werkzeug.utils import secure_filename
 from rag_processor import create_rag_chain
 from typing import Sequence, Any, List
@@ -10,10 +9,7 @@ import re
 import io
 from gtts import gTTS
 from langchain_core.documents import Document
-from langchain_community.document_loaders import (
-    TextLoader,
-    Docx2txtLoader,
-)
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain_huggingface import HuggingFaceEmbeddings
 from langchain_community.vectorstores import FAISS
@@ -23,124 +19,203 @@ from langchain_community.retrievers import BM25Retriever
 from langchain_community.chat_message_histories import ChatMessageHistory
 from langchain.storage import InMemoryStore
 from sentence_transformers.cross_encoder import CrossEncoder
 app = Flask(__name__)
 app.config['SECRET_KEY'] = os.urandom(24)
 class LocalReranker(BaseDocumentCompressor):
     model: Any
-    top_n: int = 3
     class Config:
         arbitrary_types_allowed = True
-    def compress_documents(
-        self,
-        documents: Sequence[Document],
-        query: str,
-        callbacks=None,
-    ) -> Sequence[Document]:
         if not documents:
             return []
         pairs = [[query, doc.page_content] for doc in documents]
         scores = self.model.predict(pairs, show_progress_bar=False)
         doc_scores = list(zip(documents, scores))
-        sorted_doc_scores = sorted(doc_scores, key=lambda x: x[1], reverse=True)
         top_docs = []
-        for doc, score in sorted_doc_scores[:self.top_n]:
             doc.metadata['rerank_score'] = float(score)
             top_docs.append(doc)
         return top_docs
-is_hf_spaces = bool(os.getenv("SPACE_ID") or os.getenv("SPACES_ZERO_GPU"))
-if is_hf_spaces:
-    app.config['UPLOAD_FOLDER'] = '/tmp/uploads'
-else:
-    app.config['UPLOAD_FOLDER'] = 'uploads'
 try:
     os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)
-    print(f"Upload folder ready: {app.config['UPLOAD_FOLDER']}")
 except Exception as e:
-    print(f"Failed to create upload folder {app.config['UPLOAD_FOLDER']}: {e}")
     app.config['UPLOAD_FOLDER'] = '/tmp/uploads'
     os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)
-    print(f"Using fallback upload folder: {app.config['UPLOAD_FOLDER']}")
-rag_chains = {}
 message_histories = {}
-print("Loading embedding model...")
 try:
-    EMBEDDING_MODEL = HuggingFaceEmbeddings(
-        model_name="sentence-transformers/all-MiniLM-L6-v2",
-        model_kwargs={'device': 'cpu'}
-    )
-    print("Embedding model loaded successfully.")
 except Exception as e:
-    print(f"FATAL: Could not load embedding model. Error: {e}")
     raise e
-print("Loading local re-ranking model...")
 try:
-    RERANKER_MODEL = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2", device='cpu')
-    print("Re-ranking model loaded successfully.")
 except Exception as e:
-    print(f"FATAL: Could not load reranker model. Error: {e}")
     raise e
 def load_pdf_with_fallback(filepath):
     try:
         docs = []
         with fitz.open(filepath) as pdf_doc:
-            for page_num, page in enumerate(pdf_doc):
                 text = page.get_text()
                 if text.strip():
-                    docs.append(Document(
-                        page_content=text,
-                        metadata={
-                            "source": os.path.basename(filepath),
-                            "page": page_num + 1,
-                        }
-                    ))
         if docs:
-            print(f"Successfully loaded PDF with PyMuPDF: {filepath}")
             return docs
         else:
-            raise ValueError("No text content found in PDF.")
     except Exception as e:
-        print(f"PyMuPDF failed for {filepath}: {e}")
         raise
-LOADER_MAPPING = {
-    ".txt": TextLoader,
-    ".pdf": load_pdf_with_fallback,
-    ".docx": Docx2txtLoader,
-}
 def get_session_history(session_id: str) -> ChatMessageHistory:
     if session_id not in message_histories:
         message_histories[session_id] = ChatMessageHistory()
     return message_histories[session_id]
 @app.route('/health', methods=['GET'])
 def health_check():
-    return jsonify({'status': 'healthy'}), 200
 @app.route('/', methods=['GET'])
 def index():
     return render_template('index.html')
 @app.route('/upload', methods=['POST'])
 def upload_files():
     files = request.files.getlist('file')
-    if not files or all(f.filename == '' for f in files):
-        return jsonify({'status': 'error', 'message': 'No selected files.'}), 400
-    all_docs = []
-    processed_files, failed_files = [], []
     for file in files:
         if file and file.filename:
             filename = secure_filename(file.filename)
@@ -149,147 +224,202 @@ def upload_files():
                 file.save(filepath)
                 file_ext = os.path.splitext(filename)[1].lower()
                 if file_ext not in LOADER_MAPPING:
-                    raise ValueError("Unsupported file format.")
                 loader_func = LOADER_MAPPING[file_ext]
-                docs = loader_func(filepath) if file_ext == ".pdf" else loader_func(filepath).load()
                 if not docs:
-                    raise ValueError("No content extracted.")
                 all_docs.extend(docs)
                 processed_files.append(filename)
-                print(f"✓ Successfully processed: {filename}")
             except Exception as e:
-                error_msg = str(e)
-                print(f"✗ Error processing {filename}: {error_msg}")
-                failed_files.append(f"{filename} ({error_msg})")
     if not all_docs:
-        error_summary = "Failed to process all files."
-        if failed_files:
-            error_summary += " Reasons: " + ", ".join(failed_files)
-        return jsonify({'status': 'error', 'message': error_summary}), 400
     try:
-        print("Starting RAG pipeline setup...")
-        vectorstore = FAISS.from_texts([""], EMBEDDING_MODEL)
         store = InMemoryStore()
-        parent_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=200)
-        child_splitter = RecursiveCharacterTextSplitter(chunk_size=400, chunk_overlap=50)
-        parent_docs = parent_splitter.split_documents(all_docs)
-        doc_ids = [str(uuid.uuid4()) for _ in parent_docs]
-        child_docs = []
-        for i, doc in enumerate(parent_docs):
-            _id = doc_ids[i]
-            sub_docs = child_splitter.split_documents([doc])
-            for child in sub_docs:
-                child.metadata["doc_id"] = _id
-            child_docs.extend(sub_docs)
         store.mset(list(zip(doc_ids, parent_docs)))
-        vectorstore.add_documents(child_docs)
-        print(f"Stored {len(parent_docs)} parent docs and indexed {len(child_docs)} child docs.")
         bm25_retriever = BM25Retriever.from_documents(child_docs)
-        bm25_retriever.k = 8
-        faiss_retriever = vectorstore.as_retriever(search_kwargs={"k": 8})
-        ensemble_retriever = EnsembleRetriever(
-            retrievers=[bm25_retriever, faiss_retriever],
-            weights=[0.4, 0.6]
-        )
-        print("Created Hybrid Retriever for child documents.")
-        reranker = LocalReranker(model=RERANKER_MODEL, top_n=4)
         def get_parents(docs: List[Document]) -> List[Document]:
-            parent_ids = {d.metadata["doc_id"] for d in docs}
-            return store.mget(list(parent_ids))
-        compression_retriever = ContextualCompressionRetriever(
-            base_compressor=reranker, base_retriever=ensemble_retriever
-        )
         final_retriever = compression_retriever | get_parents
-        print("Final retriever chain created: (Hybrid -> Rerank) -> Parent Fetch")
         session_id = str(uuid.uuid4())
-        rag_chain = create_rag_chain(final_retriever, get_session_history)
-        rag_chains[session_id] = rag_chain
-        session['session_id'] = session_id
-        success_msg = f"Successfully processed: {', '.join(processed_files)}"
         if failed_files:
-            success_msg += f"\nFailed to process: {', '.join(failed_files)}"
         return jsonify({
-            'status': 'success',
             'filename': success_msg,
-            'session_id': session_id
-        })
     except Exception as e:
         import traceback
         traceback.print_exc()
-        return jsonify({'status': 'error', 'message': f'Failed during RAG setup: {e}'}), 500
-@app.route('/chat', methods=['POST'])
 def chat():
-    data = request.get_json()
-    question = data.get('question')
-    session_id = session.get('session_id') or data.get('session_id')
-    if not question or not session_id or session_id not in rag_chains:
-        return jsonify({'status': 'error', 'message': 'Invalid session or no question provided.'}), 400
-    try:
-        rag_chain = rag_chains[session_id]
-        config = {"configurable": {"session_id": session_id}}
-        answer_string = rag_chain.invoke({"question": question}, config=config)
-        return jsonify({'answer': answer_string})
     except Exception as e:
-        print(f"Error during chat invocation: {e}")
-        return jsonify({'status': 'error', 'message': 'An error occurred while getting the answer.'}), 500
 def clean_markdown_for_tts(text: str) -> str:
-    text = re.sub(r'\*(\*?)(.*?)\1\*', r'\2', text)
-    text = re.sub(r'\_(.*?)\_', r'\1', text)
-    text = re.sub(r'`(.*?)`', r'\1', text)
-    text = re.sub(r'^\s*#{1,6}\s+', '', text, flags=re.MULTILINE)
-    text = re.sub(r'^\s*[\*\-]\s+', '', text, flags=re.MULTILINE)
-    text = re.sub(r'^\s*\d+\.\s+', '', text, flags=re.MULTILINE)
-    text = re.sub(r'^\s*>\s?', '', text, flags=re.MULTILINE)
-    text = re.sub(r'^\s*[-*_]{3,}\s*$', '', text, flags=re.MULTILINE)
-    text = re.sub(r'\n+', ' ', text)
     return text.strip()
 @app.route('/tts', methods=['POST'])
 def text_to_speech():
     data = request.get_json()
     text = data.get('text')
     if not text:
-        return jsonify({'status': 'error', 'message': 'No text provided.'}), 400
     try:
         clean_text = clean_markdown_for_tts(text)
         tts = gTTS(clean_text, lang='en')
         mp3_fp = io.BytesIO()
         tts.write_to_fp(mp3_fp)
         mp3_fp.seek(0)
         return Response(mp3_fp, mimetype='audio/mpeg')
     except Exception as e:
-        print(f"Error in TTS generation: {e}")
-        return jsonify({'status': 'error', 'message': 'Failed to generate audio.'}), 500
 if __name__ == '__main__':
-    port = int(os.environ.get("PORT", 7860))
-    app.run(host="0.0.0.0", port=port, debug=False)

 import os
 import uuid
+from flask import Flask, request, render_template, session, jsonify, Response, stream_with_context
 from werkzeug.utils import secure_filename
 from rag_processor import create_rag_chain
 from typing import Sequence, Any, List
 import io
 from gtts import gTTS
 from langchain_core.documents import Document
+from langchain_community.document_loaders import TextLoader, Docx2txtLoader
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain_huggingface import HuggingFaceEmbeddings
 from langchain_community.vectorstores import FAISS
 from langchain_community.chat_message_histories import ChatMessageHistory
 from langchain.storage import InMemoryStore
 from sentence_transformers.cross_encoder import CrossEncoder
 app = Flask(__name__)
 app.config['SECRET_KEY'] = os.urandom(24)
+TEMPERATURE_LABELS = {
+    '0.2': 'Precise',
+    '0.4': 'Confident',
+    '0.6': 'Balanced',
+    '0.8': 'Flexible',
+    '1.0': 'Creative',
+    }
 class LocalReranker(BaseDocumentCompressor):
     model: Any
+    top_n: int = 5
     class Config:
         arbitrary_types_allowed = True
+    def compress_documents(self, documents: Sequence[Document], query: str,
+                           callbacks=None) -> Sequence[Document]:
         if not documents:
             return []
         pairs = [[query, doc.page_content] for doc in documents]
         scores = self.model.predict(pairs, show_progress_bar=False)
         doc_scores = list(zip(documents, scores))
+        sorted_doc_scores = sorted(doc_scores, key=lambda x: x[1],
+                                   reverse=True)
         top_docs = []
+        for (doc, score) in sorted_doc_scores[:self.top_n]:
             doc.metadata['rerank_score'] = float(score)
             top_docs.append(doc)
         return top_docs
+def create_optimized_parent_child_chunks(all_docs):
+    if not all_docs:
+        print ('CHUNKING: No input documents provided!')
+        return ([], [], [])
+    parent_splitter = RecursiveCharacterTextSplitter(chunk_size=900,
+            chunk_overlap=200, separators=['\n\n', '\n', '. ', '! ',
+            '? ', '; ', ', ', ' ', ''])
+    child_splitter = RecursiveCharacterTextSplitter(chunk_size=350,
+            chunk_overlap=80, separators=['\n', '. ', '! ', '? ', '; ',
+            ', ', ' ', ''])
+    parent_docs = parent_splitter.split_documents(all_docs)
+    doc_ids = [str(uuid.uuid4()) for _ in parent_docs]
+    child_docs = []
+    for (i, parent_doc) in enumerate(parent_docs):
+        parent_id = doc_ids[i]
+        children = child_splitter.split_documents([parent_doc])
+        for (j, child) in enumerate(children):
+            child.metadata.update({'doc_id': parent_id,
+                                  'chunk_index': j,
+                                  'total_chunks': len(children),
+                                  'is_first_chunk': j == 0,
+                                  'is_last_chunk': j == len(children)
+                                  - 1})
+            if len(children) > 1:
+                if j == 0:
+                    child.page_content = '[Beginning] ' + child.page_content
+                elif j == len(children) - 1:
+                    child.page_content = '[Continues...] '  + child.page_content
+            child_docs.append(child)
+    print (f"CHUNKING: Created {len(parent_docs)} parent and {len(child_docs)} child chunks."
+           )
+    return (parent_docs, child_docs, doc_ids)
+def get_context_aware_parents(docs: List[Document], store: InMemoryStore) -> List[Document]:
+    if not docs:
+        return []
+    (parent_scores, child_content_by_parent) = ({}, {})
+    for doc in docs:
+        parent_id = doc.metadata.get('doc_id')
+        if parent_id:
+            parent_scores[parent_id] = parent_scores.get(parent_id, 0) \
+                + 1
+            if parent_id not in child_content_by_parent:
+                child_content_by_parent[parent_id] = []
+            child_content_by_parent[parent_id].append(doc.page_content)
+    parent_ids = list(parent_scores.keys())
+    parents = store.mget(parent_ids)
+    enhanced_parents = []
+    for (i, parent) in enumerate(parents):
+        if parent is not None:
+            parent_id = parent_ids[i]
+            if parent_id in child_content_by_parent:
+                child_excerpts = '\n'.join(child_content_by_parent[parent_id][:3])
+                enhanced_content = f"{parent.page_content}\n\nRelevant excerpts:\n{child_excerpts}"
+                enhanced_parent =Document(page_content=enhanced_content,
+                             metadata={**parent.metadata,
+                             'child_relevance_score': parent_scores[parent_id],
+                             'matching_children': len(child_content_by_parent[parent_id])})
+                enhanced_parents.append(enhanced_parent)
+        else:
+            print (f"PARENT_FETCH: Parent {parent_ids[i]} not found in store!")
+    enhanced_parents.sort(key=lambda p: p.metadata.get('child_relevance_score', 0), reverse=True)
+    return enhanced_parents
+is_hf_spaces = bool(os.getenv('SPACE_ID') or os.getenv('SPACES_ZERO_GPU'
+                     ))
+app.config['UPLOAD_FOLDER'] = '/tmp/uploads' if is_hf_spaces else 'uploads'
 try:
     os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)
+    print (f"Upload folder ready: {app.config['UPLOAD_FOLDER']}")
 except Exception as e:
+    print (f"Failed to create upload folder, falling back to /tmp: {e}")
     app.config['UPLOAD_FOLDER'] = '/tmp/uploads'
     os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)
+session_data = {}
 message_histories = {}
+print ('Loading embedding model...')
 try:
+    EMBEDDING_MODEL = \
+        HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2'
+                              , model_kwargs={'device': 'cpu'},
+                              encode_kwargs={'normalize_embeddings': True})
+    print ('Embedding model loaded.')
 except Exception as e:
+    print (f"FATAL: Could not load embedding model. Error: {e}")
     raise e
+print ('Loading reranker model...')
 try:
+    RERANKER_MODEL = \
+        CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2',
+                     device='cpu')
+    print ('Reranker model loaded.')
 except Exception as e:
+    print (f"FATAL: Could not load reranker model. Error: {e}")
     raise e
 def load_pdf_with_fallback(filepath):
     try:
         docs = []
         with fitz.open(filepath) as pdf_doc:
+            for (page_num, page) in enumerate(pdf_doc):
                 text = page.get_text()
                 if text.strip():
+                    docs.append(Document(page_content=text,
+                                metadata={'source': os.path.basename(filepath),
+                                'page': page_num + 1}))
         if docs:
+            print (f"Loaded PDF: {os.path.basename(filepath)} - {len(docs)} pages"
+                   )
             return docs
         else:
+            raise ValueError('No text content found in PDF.')
     except Exception as e:
+        print (f"PyMuPDF failed for {filepath}: {e}")
         raise
+LOADER_MAPPING = {'.txt': TextLoader, '.pdf': load_pdf_with_fallback,
+                  '.docx': Docx2txtLoader}
 def get_session_history(session_id: str) -> ChatMessageHistory:
     if session_id not in message_histories:
         message_histories[session_id] = ChatMessageHistory()
     return message_histories[session_id]
 @app.route('/health', methods=['GET'])
 def health_check():
+    return (jsonify({'status': 'healthy'}), 200)
 @app.route('/', methods=['GET'])
 def index():
     return render_template('index.html')
 @app.route('/upload', methods=['POST'])
 def upload_files():
     files = request.files.getlist('file')
+    temperature_str = request.form.get('temperature', '0.2')
+    temperature = float(temperature_str)
+    model_name = request.form.get('model_name',
+                                  'moonshotai/kimi-k2-instruct')
+    print (f"UPLOAD: Model: {model_name}, Temp: {temperature}")
+    if not files or all(f.filename == '' for f in files):
+        return (jsonify({'status': 'error',
+                'message': 'No selected files.'}), 400)
+    (all_docs, processed_files, failed_files) = ([], [], [])
+    print (f"Processing {len(files)} file(s)...")
     for file in files:
         if file and file.filename:
             filename = secure_filename(file.filename)
                 file.save(filepath)
                 file_ext = os.path.splitext(filename)[1].lower()
                 if file_ext not in LOADER_MAPPING:
+                    raise ValueError('Unsupported file format.')
                 loader_func = LOADER_MAPPING[file_ext]
+                docs = loader_func(filepath) if file_ext == '.pdf' \
+                    else loader_func(filepath).load()
                 if not docs:
+                    raise ValueError('No content extracted.')
                 all_docs.extend(docs)
                 processed_files.append(filename)
             except Exception as e:
+                print (f"✗ Error processing {filename}: {e}")
+                failed_files.append(f"{filename} ({e})")
     if not all_docs:
+        return (jsonify({'status': 'error',
+                'message': f"Failed to process all files. Reasons: {', '.join(failed_files)}"
+                }), 400)
+    print (f"UPLOAD: Processed {len(processed_files)} files.")
     try:
+        print ('Starting RAG pipeline setup...')
+        (parent_docs, child_docs, doc_ids) = \
+            create_optimized_parent_child_chunks(all_docs)
+        if not child_docs:
+            raise ValueError('No child documents created during chunking.')
+        vectorstore = FAISS.from_documents(child_docs, EMBEDDING_MODEL)
         store = InMemoryStore()
         store.mset(list(zip(doc_ids, parent_docs)))
+        print (f"Indexed {len(child_docs)} document chunks.")
         bm25_retriever = BM25Retriever.from_documents(child_docs)
+        bm25_retriever.k = 12
+        faiss_retriever = vectorstore.as_retriever(search_kwargs={'k': 12})
+        ensemble_retriever = \
+            EnsembleRetriever(retrievers=[bm25_retriever,
+                               faiss_retriever], weights=[0.6, 0.4])
+        reranker = LocalReranker(model=RERANKER_MODEL, top_n=5)
         def get_parents(docs: List[Document]) -> List[Document]:
+            return get_context_aware_parents(docs, store)
+        compression_retriever = \
+            ContextualCompressionRetriever(base_compressor=reranker,
+                base_retriever=ensemble_retriever)
         final_retriever = compression_retriever | get_parents
         session_id = str(uuid.uuid4())
+        (rag_chain, api_key_manager) = \
+            create_rag_chain(retriever=final_retriever,
+                             get_session_history_func=get_session_history,
+                             model_name=model_name,
+                             temperature=temperature)
+        session_data[session_id] = {'chain': rag_chain,
+                                    'model_name': model_name,
+                                    'temperature': temperature,
+                                    'api_key_manager': api_key_manager}
+        success_msg = f"Processed: {', '.join(processed_files)}"
         if failed_files:
+            success_msg += f". Failed: {', '.join(failed_files)}"
+        mode_label = TEMPERATURE_LABELS.get(temperature_str,
+                temperature_str)
+        print (f"UPLOAD COMPLETE: Session {session_id} is ready.")
         return jsonify({
+            'status': 'success',
             'filename': success_msg,
+            'session_id': session_id,
+            'model_name': model_name,
+            'mode': mode_label,
+            })
     except Exception as e:
         import traceback
         traceback.print_exc()
+        return (jsonify({'status': 'error',
+                'message': f'RAG setup failed: {e}'}), 500)
+@app.route('/chat', methods=['POST', 'GET'])
 def chat():
+    if request.method == 'GET':
+        question = request.args.get('question')
+        session_id = request.args.get('session_id')
+        print(f"Received GET request for chat: session={session_id}, question={question[:50]}...")
+    elif request.method == 'POST':
+        data = request.get_json()
+        question = data.get('question')
+        session_id = data.get('session_id') or session.get('session_id')
+        print(f"Received POST request for chat: session={session_id}, question={question[:50]}...")
+    else:
+        return (jsonify({'status': 'error', 'message': 'Method not allowed'}), 405)
+    if not question:
+        error_msg = "Error: No question provided."
+        print(f"CHAT Validation Error: {error_msg}")
+        if request.method == 'GET':
+            def error_stream():
+                yield f'data: {{"error": "{error_msg}"}}\n\n'
+            return Response(stream_with_context(error_stream()), mimetype='text/event-stream', status=400)
+        return jsonify({'status': 'error','message': error_msg}), 400
+    if not session_id or session_id not in session_data:
+        error_msg = "Error: Invalid session. Please upload documents first."
+        print(f"CHAT Validation Error: Invalid session {session_id}.")
+        if request.method == 'GET':
+            def error_stream():
+                yield f'data: {{"error": "{error_msg}"}}\n\n'
+            return Response(stream_with_context(error_stream()), mimetype='text/event-stream', status=400)
+        return jsonify({'status': 'error', 'message': error_msg }), 400
+    try:
+        session_info = session_data[session_id]
+        rag_chain = session_info['chain']
+        model_name = session_info['model_name']
+        temperature_float = session_info['temperature']
+        temperature_str = str(temperature_float)
+        mode_label = TEMPERATURE_LABELS.get(temperature_str, temperature_str)
+        print (f"CHAT: Streaming response for session {session_id} (Model: {model_name}, Temp: {temperature_float})...")
+        def generate_chunks():
+            full_response = ''
+            try:
+                stream_iterator = rag_chain.stream({'question': question},
+                        config={'configurable': {'session_id': session_id}})
+                for chunk in stream_iterator:
+                    if isinstance(chunk, str):
+                        full_response += chunk
+                        token_escaped = chunk.replace('\\', '\\\\').replace('"', '\\"').replace('\n', '\\n')
+                        model_name_escaped = model_name.replace('"', '\\"')
+                        mode_label_escaped = mode_label.replace('"', '\\"')
+                        yield f'data: {{"token": "{token_escaped}", "model_name": "{model_name_escaped}", "mode": "{mode_label_escaped}"}}\n\n'
+                    else:
+                        print(f"Received non-string chunk: {type(chunk)}")
+                print ('CHAT: Streaming finished successfully.')
+            except Exception as e:
+                print(f"CHAT Error during streaming generation: {e}")
+                import traceback
+                traceback.print_exc()
+                error_msg = f"Error during response generation: {str(e)}".replace('\\', '\\\\').replace('"', '\\"').replace('\n', '\\n')
+                yield f'data: {{"error": "{error_msg}"}}\n\n'
+        return Response(stream_with_context(generate_chunks()), mimetype='text/event-stream')
     except Exception as e:
+        print(f"CHAT Setup Error: {e}")
+        import traceback
+        traceback.print_exc()
+        error_msg = f"Error setting up chat stream: {str(e)}"
+        if request.method == 'GET':
+             def error_stream():
+                clean_error_msg= error_msg.replace("\"", "\\\"").replace("n", "\\n")
+                yield f'data: {{"error": "{clean_error_msg}"}}\n\n'
+             return Response(stream_with_context(error_stream()), mimetype='text/event-stream', status=500)
+        return (jsonify({'status': 'error', 'message': error_msg}), 500)
 def clean_markdown_for_tts(text: str) -> str:
+    text = re.sub(r'\[.*?\]\(.*?\)', '', text)
+    text = re.sub(r'[`*_#]', '', text)
+    text = re.sub(r'^\s*[\-\*\+]\s+', '', text, flags=re.MULTILINE)
+    text = re.sub(r'^\s*\d+\.\s+', '', text, flags=re.MULTILINE)
+    text = re.sub(r'^\s*>\s?', '', text, flags=re.MULTILINE)
+    text = re.sub(r'\n+', ' ', text)
+    text = re.sub(r'\s{2,}', ' ', text)
     return text.strip()
 @app.route('/tts', methods=['POST'])
 def text_to_speech():
     data = request.get_json()
     text = data.get('text')
     if not text:
+        return (jsonify({'status': 'error',
+                'message': 'No text provided.'}), 400)
     try:
         clean_text = clean_markdown_for_tts(text)
+        if not clean_text:
+             return (jsonify({'status': 'error', 'message': 'No speakable text found.'}), 400)
         tts = gTTS(clean_text, lang='en')
         mp3_fp = io.BytesIO()
         tts.write_to_fp(mp3_fp)
         mp3_fp.seek(0)
         return Response(mp3_fp, mimetype='audio/mpeg')
     except Exception as e:
+        print (f"TTS Error: {e}")
+        return (jsonify({'status': 'error',
+                'message': 'Failed to generate audio.'}), 500)
 if __name__ == '__main__':
+    port = int(os.environ.get('PORT', 7860))
+    print (f"Starting Flask app on port {port}")
+    app.run(host='0.0.0.0', port=port, debug=False, threaded=True)

evaluate.py ADDED Viewed

	@@ -0,0 +1,205 @@

+import os
+import asyncio
+import uuid
+from dotenv import load_dotenv
+from datasets import Dataset
+import pandas as pd
+from typing import Sequence, Any, List
+# Ragas and LangChain components
+from ragas import evaluate
+from ragas.metrics import (
+    faithfulness,
+    answer_relevancy,
+    context_recall,
+    context_precision,
+)
+from ragas.testset import TestsetGenerator
+# NOTE: The 'evolutions' import has been completely removed.
+# Your specific RAG components from app.py
+from langchain_groq import ChatGroq
+from langchain_community.document_loaders import PyMuPDFLoader
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_huggingface import HuggingFaceEmbeddings
+from langchain_community.vectorstores import FAISS
+from langchain.storage import InMemoryStore
+from langchain_community.retrievers import BM25Retriever
+from langchain.retrievers import EnsembleRetriever, ContextualCompressionRetriever
+from langchain.retrievers.document_compressors.base import BaseDocumentCompressor
+from langchain_core.documents import Document
+from sentence_transformers.cross_encoder import CrossEncoder
+from rag_processor import create_rag_chain
+from langchain_community.chat_message_histories import ChatMessageHistory
+import fitz
+# Load environment variables
+load_dotenv()
+# --- Re-implementing LocalReranker from app.py ---
+class LocalReranker(BaseDocumentCompressor):
+    model: Any
+    top_n: int = 3
+    class Config:
+        arbitrary_types_allowed = True
+    def compress_documents(self, documents: Sequence[Document], query: str, callbacks=None) -> Sequence[Document]:
+        if not documents: return []
+        pairs = [[query, doc.page_content] for doc in documents]
+        scores = self.model.predict(pairs, show_progress_bar=False)
+        doc_scores = list(zip(documents, scores))
+        sorted_doc_scores = sorted(doc_scores, key=lambda x: x[1], reverse=True)
+        top_docs = []
+        for doc, score in sorted_doc_scores[:self.top_n]:
+            doc.metadata['rerank_score'] = float(score)
+            top_docs.append(doc)
+        return top_docs
+# --- Helper Functions ---
+def load_pdf_with_fallback(filepath):
+    """Load PDF using PyMuPDF"""
+    try:
+        docs = []
+        with fitz.open(filepath) as pdf_doc:
+            for page_num, page in enumerate(pdf_doc):
+                text = page.get_text()
+                if text.strip():
+                    docs.append(Document(
+                        page_content=text,
+                        metadata={"source": os.path.basename(filepath), "page": page_num + 1}
+                    ))
+        if docs:
+            print(f"✓ Successfully loaded PDF: {filepath}")
+            return docs
+        else:
+            raise ValueError("No text content found in PDF.")
+    except Exception as e:
+        print(f"✗ PyMuPDF failed for {filepath}: {e}")
+        raise
+async def main():
+    """Main execution function"""
+    print("\n" + "="*60 + "\nSTARTING RAGAS EVALUATION\n" + "="*60)
+    pdf_path = "uploads/Unit_-_1_Introduction.pdf"
+    if not os.path.exists(pdf_path):
+        print(f"✗ Error: PDF not found at {pdf_path}")
+        return
+    try:
+        # --- 1. Setup Models ---
+        print("\n--- 1. Initializing Models ---")
+        groq_api_key = os.getenv("GROQ_API_KEY")
+        if not groq_api_key or groq_api_key == "your_groq_api_key_here":
+            raise ValueError("GROQ_API_KEY not found or is a placeholder.")
+        generator_llm = ChatGroq(model="llama-3.1-8b-instant", api_key=groq_api_key)
+        critic_llm = ChatGroq(model="llama-3.1-70b-versatile", api_key=groq_api_key)
+        embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
+        reranker_model = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2", device='cpu')
+        print("✓ Models initialized.")
+        # --- 2. Setup RAG Pipeline ---
+        print("\n--- 2. Setting up RAG Pipeline ---")
+        documents = load_pdf_with_fallback(pdf_path)
+        # Split documents
+        parent_splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=400)
+        child_splitter = RecursiveCharacterTextSplitter(chunk_size=250, chunk_overlap=50)
+        parent_docs = parent_splitter.split_documents(documents)
+        doc_ids = [str(uuid.uuid4()) for _ in parent_docs]
+        child_docs = []
+        for i, doc in enumerate(parent_docs):
+            _id = doc_ids[i]
+            sub_docs = child_splitter.split_documents([doc])
+            for child in sub_docs:
+                child.metadata["doc_id"] = _id
+            child_docs.extend(sub_docs)
+        store = InMemoryStore()
+        store.mset(list(zip(doc_ids, parent_docs)))
+        vectorstore = FAISS.from_documents(child_docs, embedding_model)
+        bm25_retriever = BM25Retriever.from_documents(child_docs, k=10)
+        faiss_retriever = vectorstore.as_retriever(search_kwargs={"k": 10})
+        ensemble_retriever = EnsembleRetriever(retrievers=[bm25_retriever, faiss_retriever], weights=[0.4, 0.6])
+        reranker = LocalReranker(model=reranker_model, top_n=5)
+        compression_retriever = ContextualCompressionRetriever(base_compressor=reranker, base_retriever=ensemble_retriever)
+        def get_parents(docs: List[Document]) -> List[Document]:
+            parent_ids = {d.metadata["doc_id"] for d in docs}
+            return store.mget(list(parent_ids))
+        final_retriever = compression_retriever | get_parents
+        message_histories = {}
+        def get_session_history(session_id: str):
+            if session_id not in message_histories:
+                message_histories[session_id] = ChatMessageHistory()
+            return message_histories[session_id]
+        rag_chain = create_rag_chain(final_retriever, get_session_history)
+        print("✓ RAG chain created successfully.")
+        # --- 3. Generate Testset ---
+        print("\n--- 3. Generating Test Questions ---")
+        generator = TestsetGenerator.from_langchain(generator_llm, critic_llm, embedding_model)
+        # Generate a simple test set without complex distributions
+        testset = generator.generate_with_langchain_docs(documents, testset_size=5)
+        print("✓ Testset generated.")
+        # --- 4. Run RAG Chain on Testset ---
+        print("\n--- 4. Running RAG Chain to Generate Answers ---")
+        test_questions = [item['question'] for item in testset.to_pandas().to_dict('records')]
+        ground_truths = [item['ground_truth'] for item in testset.to_pandas().to_dict('records')]
+        answers = []
+        contexts = []
+        for i, question in enumerate(test_questions):
+            print(f"  Processing question {i+1}/{len(test_questions)}...")
+            # Retrieve contexts
+            retrieved_docs = final_retriever.invoke(question)
+            contexts.append([doc.page_content for doc in retrieved_docs])
+            # Get answer from chain
+            config = {"configurable": {"session_id": str(uuid.uuid4())}}
+            answer = await rag_chain.ainvoke({"question": question}, config=config)
+            answers.append(answer)
+        # --- 5. Evaluate with Ragas ---
+        print("\n--- 5. Evaluating Results with Ragas ---")
+        eval_data = {
+            'question': test_questions,
+            'answer': answers,
+            'contexts': contexts,
+            'ground_truth': ground_truths
+        }
+        eval_dataset = Dataset.from_dict(eval_data)
+        result = evaluate(
+            eval_dataset,
+            metrics=[faithfulness, answer_relevancy, context_precision, context_recall],
+            llm=critic_llm,
+            embeddings=embedding_model
+        )
+        print("\n" + "="*60 + "\nEVALUATION RESULTS\n" + "="*60)
+        print(result)
+        # --- 6. Save Results ---
+        print("\n--- 6. Saving Results ---")
+        results_df = result.to_pandas()
+        results_df.to_csv("evaluation_results.csv", index=False)
+        print("✓ Evaluation results saved to evaluation_results.csv")
+        print("\n" + "="*60 + "\nEVALUATION COMPLETE!\n" + "="*60)
+    except Exception as e:
+        print(f"\n✗ An error occurred during the process: {e}")
+        import traceback
+        traceback.print_exc()
+if __name__ == "__main__":
+    asyncio.run(main())

query_expansion.py ADDED Viewed

	@@ -0,0 +1,524 @@

+"""
+Query Expansion System for CogniChat RAG Application
+This module implements advanced query expansion techniques to improve retrieval quality:
+- QueryAnalyzer: Extracts intent, entities, and keywords
+- QueryRephraser: Generates natural language variations
+- MultiQueryExpander: Creates diverse query formulations
+- MultiHopReasoner: Connects concepts across documents
+- FallbackStrategies: Handles edge cases gracefully
+Author: CogniChat Team
+Date: October 19, 2025
+"""
+import re
+from typing import List, Dict, Any, Optional
+from dataclasses import dataclass
+from enum import Enum
+class QueryStrategy(Enum):
+    """Query expansion strategies with different complexity levels."""
+    QUICK = "quick"  # 2 queries - fast, minimal expansion
+    BALANCED = "balanced"  # 3-4 queries - good balance
+    COMPREHENSIVE = "comprehensive"  # 5-6 queries - maximum coverage
+@dataclass
+class QueryAnalysis:
+    """Results from query analysis."""
+    intent: str  # question, definition, comparison, explanation, etc.
+    entities: List[str]  # Named entities extracted
+    keywords: List[str]  # Important keywords
+    complexity: str  # simple, medium, complex
+    domain: Optional[str] = None  # Technical domain if detected
+@dataclass
+class ExpandedQuery:
+    """Container for expanded query variations."""
+    original: str
+    variations: List[str]
+    strategy_used: QueryStrategy
+    analysis: QueryAnalysis
+class QueryAnalyzer:
+    """
+    Analyzes queries to extract intent, entities, and key information.
+    Uses LLM-based analysis for intelligent query understanding.
+    """
+    def __init__(self, llm=None):
+        """
+        Initialize QueryAnalyzer.
+        Args:
+            llm: Optional LangChain LLM for advanced analysis
+        """
+        self.llm = llm
+        self.intent_patterns = {
+            'definition': r'\b(what is|define|meaning of|definition)\b',
+            'how_to': r'\b(how to|how do|how can|steps to)\b',
+            'comparison': r'\b(compare|difference|versus|vs|better than)\b',
+            'explanation': r'\b(why|explain|reason|cause)\b',
+            'listing': r'\b(list|enumerate|what are|types of)\b',
+            'example': r'\b(example|instance|sample|case)\b',
+        }
+    def analyze(self, query: str) -> QueryAnalysis:
+        """
+        Analyze query to extract intent, entities, and keywords.
+        Args:
+            query: User's original query
+        Returns:
+            QueryAnalysis object with extracted information
+        """
+        query_lower = query.lower()
+        # Detect intent
+        intent = self._detect_intent(query_lower)
+        # Extract entities (simplified - can be enhanced with NER)
+        entities = self._extract_entities(query)
+        # Extract keywords
+        keywords = self._extract_keywords(query)
+        # Assess complexity
+        complexity = self._assess_complexity(query, entities, keywords)
+        # Detect domain
+        domain = self._detect_domain(query_lower)
+        return QueryAnalysis(
+            intent=intent,
+            entities=entities,
+            keywords=keywords,
+            complexity=complexity,
+            domain=domain
+        )
+    def _detect_intent(self, query_lower: str) -> str:
+        """Detect query intent using pattern matching."""
+        for intent, pattern in self.intent_patterns.items():
+            if re.search(pattern, query_lower):
+                return intent
+        return 'general'
+    def _extract_entities(self, query: str) -> List[str]:
+        """Extract named entities (simplified version)."""
+        # Look for capitalized words (potential entities)
+        words = query.split()
+        entities = []
+        for word in words:
+            # Skip common words at sentence start
+            if word[0].isupper() and word.lower() not in ['what', 'how', 'why', 'when', 'where', 'which']:
+                entities.append(word)
+        # Look for quoted terms
+        quoted = re.findall(r'"([^"]+)"', query)
+        entities.extend(quoted)
+        return list(set(entities))
+    def _extract_keywords(self, query: str) -> List[str]:
+        """Extract important keywords from query."""
+        # Remove stop words (simplified list)
+        stop_words = {
+            'a', 'an', 'the', 'is', 'are', 'was', 'were', 'be', 'been',
+            'what', 'how', 'why', 'when', 'where', 'which', 'who',
+            'do', 'does', 'did', 'can', 'could', 'should', 'would',
+            'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by'
+        }
+        # Split and filter
+        words = re.findall(r'\b\w+\b', query.lower())
+        keywords = [w for w in words if w not in stop_words and len(w) > 2]
+        return keywords[:10]  # Limit to top 10
+    def _assess_complexity(self, query: str, entities: List[str], keywords: List[str]) -> str:
+        """Assess query complexity."""
+        word_count = len(query.split())
+        entity_count = len(entities)
+        keyword_count = len(keywords)
+        # Simple scoring
+        score = word_count + (entity_count * 2) + (keyword_count * 1.5)
+        if score < 15:
+            return 'simple'
+        elif score < 30:
+            return 'medium'
+        else:
+            return 'complex'
+    def _detect_domain(self, query_lower: str) -> Optional[str]:
+        """Detect technical domain if present."""
+        domains = {
+            'programming': ['code', 'function', 'class', 'variable', 'algorithm', 'debug'],
+            'data_science': ['model', 'dataset', 'training', 'prediction', 'accuracy'],
+            'machine_learning': ['neural', 'network', 'learning', 'ai', 'deep learning'],
+            'web': ['html', 'css', 'javascript', 'api', 'frontend', 'backend'],
+            'database': ['sql', 'query', 'database', 'table', 'index'],
+            'security': ['encryption', 'authentication', 'vulnerability', 'attack'],
+        }
+        for domain, keywords in domains.items():
+            if any(kw in query_lower for kw in keywords):
+                return domain
+        return None
+class QueryRephraser:
+    """
+    Generates natural language variations of queries using multiple strategies.
+    """
+    def __init__(self, llm=None):
+        """
+        Initialize QueryRephraser.
+        Args:
+            llm: LangChain LLM for generating variations
+        """
+        self.llm = llm
+    def generate_variations(
+        self,
+        query: str,
+        analysis: QueryAnalysis,
+        strategy: QueryStrategy = QueryStrategy.BALANCED
+    ) -> List[str]:
+        """
+        Generate query variations based on strategy.
+        Args:
+            query: Original query
+            analysis: Query analysis results
+            strategy: Expansion strategy to use
+        Returns:
+            List of query variations
+        """
+        variations = [query]  # Always include original
+        if strategy == QueryStrategy.QUICK:
+            # Just add synonym variation
+            variations.append(self._synonym_variation(query, analysis))
+        elif strategy == QueryStrategy.BALANCED:
+            # Add synonym, expanded, and simplified versions
+            variations.append(self._synonym_variation(query, analysis))
+            variations.append(self._expanded_variation(query, analysis))
+            variations.append(self._simplified_variation(query, analysis))
+        elif strategy == QueryStrategy.COMPREHENSIVE:
+            # Add all variations
+            variations.append(self._synonym_variation(query, analysis))
+            variations.append(self._expanded_variation(query, analysis))
+            variations.append(self._simplified_variation(query, analysis))
+            variations.append(self._keyword_focused(query, analysis))
+            variations.append(self._context_variation(query, analysis))
+            # Add one more: alternate phrasing
+            if analysis.intent in ['how_to', 'explanation']:
+                variations.append(f"Guide to {' '.join(analysis.keywords[:3])}")
+        # Remove duplicates and None values
+        variations = [v for v in variations if v]
+        return list(dict.fromkeys(variations))  # Preserve order, remove dupes
+    def _synonym_variation(self, query: str, analysis: QueryAnalysis) -> str:
+        """Generate variation using synonyms."""
+        # Common synonym replacements
+        synonyms = {
+            'error': 'issue',
+            'problem': 'issue',
+            'fix': 'resolve',
+            'use': 'utilize',
+            'create': 'generate',
+            'make': 'create',
+            'get': 'retrieve',
+            'show': 'display',
+            'find': 'locate',
+            'explain': 'describe',
+        }
+        words = query.lower().split()
+        for i, word in enumerate(words):
+            if word in synonyms:
+                words[i] = synonyms[word]
+                break  # Only replace one word to keep natural
+        return ' '.join(words).capitalize()
+    def _expanded_variation(self, query: str, analysis: QueryAnalysis) -> str:
+        """Generate expanded version with more detail."""
+        if analysis.intent == 'definition':
+            return f"Detailed explanation and definition of {' '.join(analysis.keywords)}"
+        elif analysis.intent == 'how_to':
+            return f"Step-by-step guide on {query.lower()}"
+        elif analysis.intent == 'comparison':
+            return f"Comprehensive comparison: {query}"
+        else:
+            # Add qualifying words
+            return f"Detailed information about {query.lower()}"
+    def _simplified_variation(self, query: str, analysis: QueryAnalysis) -> str:
+        """Generate simplified version focusing on core concepts."""
+        # Use just the keywords
+        if len(analysis.keywords) >= 2:
+            return ' '.join(analysis.keywords[:3])
+        return query
+    def _keyword_focused(self, query: str, analysis: QueryAnalysis) -> str:
+        """Create keyword-focused variation for BM25."""
+        keywords = analysis.keywords + analysis.entities
+        return ' '.join(keywords[:5])
+    def _context_variation(self, query: str, analysis: QueryAnalysis) -> str:
+        """Add contextual information if domain detected."""
+        if analysis.domain:
+            return f"{query} in {analysis.domain} context"
+        return query
+class MultiQueryExpander:
+    """
+    Main query expansion orchestrator that combines analysis and rephrasing.
+    """
+    def __init__(self, llm=None):
+        """
+        Initialize MultiQueryExpander.
+        Args:
+            llm: LangChain LLM for advanced expansions
+        """
+        self.analyzer = QueryAnalyzer(llm)
+        self.rephraser = QueryRephraser(llm)
+    def expand(
+        self,
+        query: str,
+        strategy: QueryStrategy = QueryStrategy.BALANCED,
+        max_queries: int = 6
+    ) -> ExpandedQuery:
+        """
+        Expand query into multiple variations.
+        Args:
+            query: Original user query
+            strategy: Expansion strategy
+            max_queries: Maximum number of queries to generate
+        Returns:
+            ExpandedQuery object with all variations
+        """
+        # Analyze query
+        analysis = self.analyzer.analyze(query)
+        # Generate variations
+        variations = self.rephraser.generate_variations(query, analysis, strategy)
+        # Limit to max_queries
+        variations = variations[:max_queries]
+        return ExpandedQuery(
+            original=query,
+            variations=variations,
+            strategy_used=strategy,
+            analysis=analysis
+        )
+class MultiHopReasoner:
+    """
+    Implements multi-hop reasoning to connect concepts across documents.
+    Useful for complex queries that require information from multiple sources.
+    """
+    def __init__(self, llm=None):
+        """
+        Initialize MultiHopReasoner.
+        Args:
+            llm: LangChain LLM for reasoning
+        """
+        self.llm = llm
+    def generate_sub_queries(self, query: str, analysis: QueryAnalysis) -> List[str]:
+        """
+        Break complex query into sub-queries for multi-hop reasoning.
+        Args:
+            query: Original complex query
+            analysis: Query analysis
+        Returns:
+            List of sub-queries
+        """
+        sub_queries = [query]
+        # For comparison queries, create separate queries for each entity
+        if analysis.intent == 'comparison' and len(analysis.entities) >= 2:
+            for entity in analysis.entities[:2]:
+                sub_queries.append(f"Information about {entity}")
+        elif analysis.intent == 'comparison' and len(analysis.keywords) >= 2:
+            # Fallback: use keywords if no entities found
+            for keyword in analysis.keywords[:2]:
+                sub_queries.append(f"Information about {keyword}")
+        # For how-to queries, break into steps
+        if analysis.intent == 'how_to' and len(analysis.keywords) >= 2:
+            main_topic = ' '.join(analysis.keywords[:2])
+            sub_queries.append(f"Prerequisites for {main_topic}")
+            sub_queries.append(f"Steps to {main_topic}")
+        # For complex questions, create focused sub-queries
+        if analysis.complexity == 'complex' and len(analysis.keywords) > 3:
+            # Create queries focusing on different keyword groups
+            mid = len(analysis.keywords) // 2
+            sub_queries.append(' '.join(analysis.keywords[:mid]))
+            sub_queries.append(' '.join(analysis.keywords[mid:]))
+        return sub_queries[:5]  # Limit to 5 sub-queries
+class FallbackStrategies:
+    """
+    Implements fallback strategies for queries that don't retrieve good results.
+    """
+    @staticmethod
+    def simplify_query(query: str) -> str:
+        """Simplify query by removing modifiers and focusing on core terms."""
+        # Remove question words
+        query = re.sub(r'\b(what|how|why|when|where|which|who|can|could|should|would)\b', '', query, flags=re.IGNORECASE)
+        # Remove common phrases
+        query = re.sub(r'\b(is|are|was|were|be|been|the|a|an)\b', '', query, flags=re.IGNORECASE)
+        # Clean up extra spaces
+        query = re.sub(r'\s+', ' ', query).strip()
+        return query
+    @staticmethod
+    def broaden_query(query: str, analysis: QueryAnalysis) -> str:
+        """Broaden query to increase recall."""
+        # Remove specific constraints
+        query = re.sub(r'\b(specific|exactly|precisely|only|just)\b', '', query, flags=re.IGNORECASE)
+        # Add general terms
+        if analysis.keywords:
+            return f"{analysis.keywords[0]} overview"
+        return query
+    @staticmethod
+    def focus_entities(analysis: QueryAnalysis) -> str:
+        """Create entity-focused query as fallback."""
+        if analysis.entities:
+            return ' '.join(analysis.entities)
+        elif analysis.keywords:
+            return ' '.join(analysis.keywords[:3])
+        return ""
+# Convenience function for easy integration
+def expand_query_simple(
+    query: str,
+    strategy: str = "balanced",
+    llm=None
+) -> List[str]:
+    """
+    Simple function to expand a query without dealing with classes.
+    Args:
+        query: User's query to expand
+        strategy: "quick", "balanced", or "comprehensive"
+        llm: Optional LangChain LLM
+    Returns:
+        List of expanded query variations
+    Example:
+        >>> queries = expand_query_simple("How do I debug Python code?", strategy="balanced")
+        >>> print(queries)
+        ['How do I debug Python code?', 'How do I resolve Python code?', ...]
+    """
+    expander = MultiQueryExpander(llm=llm)
+    strategy_enum = QueryStrategy(strategy)
+    expanded = expander.expand(query, strategy=strategy_enum)
+    return expanded.variations
+# Example usage and testing
+if __name__ == "__main__":
+    # Example 1: Simple query expansion
+    print("=" * 60)
+    print("Example 1: Simple Query Expansion")
+    print("=" * 60)
+    query = "What is machine learning?"
+    queries = expand_query_simple(query, strategy="balanced")
+    print(f"\nOriginal: {query}")
+    print(f"\nExpanded queries ({len(queries)}):")
+    for i, q in enumerate(queries, 1):
+        print(f"  {i}. {q}")
+    # Example 2: Complex query with full analysis
+    print("\n" + "=" * 60)
+    print("Example 2: Complex Query with Analysis")
+    print("=" * 60)
+    expander = MultiQueryExpander()
+    query = "How do I compare the performance of different neural network architectures?"
+    result = expander.expand(query, strategy=QueryStrategy.COMPREHENSIVE)
+    print(f"\nOriginal: {result.original}")
+    print(f"\nAnalysis:")
+    print(f"  Intent: {result.analysis.intent}")
+    print(f"  Entities: {result.analysis.entities}")
+    print(f"  Keywords: {result.analysis.keywords}")
+    print(f"  Complexity: {result.analysis.complexity}")
+    print(f"  Domain: {result.analysis.domain}")
+    print(f"\nExpanded queries ({len(result.variations)}):")
+    for i, q in enumerate(result.variations, 1):
+        print(f"  {i}. {q}")
+    # Example 3: Multi-hop reasoning
+    print("\n" + "=" * 60)
+    print("Example 3: Multi-Hop Reasoning")
+    print("=" * 60)
+    reasoner = MultiHopReasoner()
+    analyzer = QueryAnalyzer()
+    query = "Compare Python and Java for web development"
+    analysis = analyzer.analyze(query)
+    sub_queries = reasoner.generate_sub_queries(query, analysis)
+    print(f"\nOriginal: {query}")
+    print(f"\nSub-queries for multi-hop reasoning:")
+    for i, sq in enumerate(sub_queries, 1):
+        print(f"  {i}. {sq}")
+    # Example 4: Fallback strategies
+    print("\n" + "=" * 60)
+    print("Example 4: Fallback Strategies")
+    print("=" * 60)
+    query = "What is the specific difference between supervised and unsupervised learning?"
+    analysis = analyzer.analyze(query)

rag_processor.py CHANGED Viewed

@@ -6,109 +6,413 @@ from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
 from langchain_core.runnables import RunnableParallel, RunnablePassthrough
 from langchain_core.output_parsers import StrOutputParser
 from langchain_core.runnables.history import RunnableWithMessageHistory
-def create_rag_chain(retriever, get_session_history_func):
-    """
-    Creates an advanced Retrieval-Augmented Generation (RAG) chain with hybrid search,
-    query rewriting, answer refinement, and conversational memory.
-    Args:
-        retriever: A configured LangChain retriever object.
-        get_session_history_func: A function to get the chat history for a session.
-    Returns:
-        A LangChain runnable object representing the RAG chain with memory.
-    Raises:
-        ValueError: If the GROQ_API_KEY is missing.
-    """
-    load_dotenv()
-    api_key = os.getenv("GROQ_API_KEY")
-    print("key loaded")
-    if not api_key or api_key == "your_groq_api_key_here":
-        error_msg = "GROQ_API_KEY not found or not configured properly.\n"
-        print("Not found key")
-        if os.getenv("SPACE_ID") or os.getenv("SPACES_ZERO_GPU"):
-            error_msg += (
-                "For Hugging Face Spaces: Set GROQ_API_KEY in your Space's Settings > Repository Secrets.\n"
-                "Go to your Space settings and add GROQ_API_KEY as a secret variable."
             )
-        else:
-            error_msg += (
-                "For local development: Set your GROQ API key in the .env file.\n"
-                "Copy .env.example to .env and add your actual API key.\n"
-                "Get your API key from: https://console.groq.com/keys"
-            )
-        raise ValueError(error_msg)
-    llm = ChatGroq(model_name="llama-3.1-8b-instant", api_key=api_key, temperature=0.2)
-    print("\nSetting up query rewriting chain...")
-    rewrite_template = """You are an expert at rewriting user questions for a vector database.
-You are here to help the user with their document.
-Based on the chat history, reformulate the follow-up question to be a standalone question.
-This new query should be optimized to find the most relevant documents in a knowledge base.
-Do NOT answer the question, only provide the rewritten, optimized question.
 Chat History:
 {chat_history}
 Follow-up Question: {question}
-Standalone Question:"""
     rewrite_prompt = ChatPromptTemplate.from_messages([
         ("system", rewrite_template),
         MessagesPlaceholder(variable_name="chat_history"),
-        ("human", "Based on our conversation, reformulate this question to be a standalone query: {question}")
     ])
     query_rewriter = rewrite_prompt | llm | StrOutputParser()
-    print("\nSetting up main RAG chain...")
-    rag_template = """You are CogniChat, an expert document analysis assistant.
-IMPORTANT RULES:
-1. ONLY use information from the provided context
-2. If the answer isn't in the context, say "I cannot find this information in the uploaded documents"
-3. Cite specific sections when possible (e.g., "According to Section 2.3...")
-4. Be concise but comprehensive
-Context:
-{context}"""
     rag_prompt = ChatPromptTemplate.from_messages([
         ("system", rag_template),
         MessagesPlaceholder(variable_name="chat_history"),
         ("human", "{question}"),
     ])
-    setup_and_retrieval = RunnableParallel({
-    "context": RunnablePassthrough.assign(
-        rewritten_question=query_rewriter
-    ) | (lambda x: x["rewritten_question"]) | retriever,
-    "question": itemgetter("question"),
-    "chat_history": itemgetter("chat_history")})
-    conversational_rag_chain = (
-        setup_and_retrieval
-        | rag_prompt
-        | llm
-        | StrOutputParser()
-    )
     chain_with_memory = RunnableWithMessageHistory(
         conversational_rag_chain,
         get_session_history_func,
         input_messages_key="question",
         history_messages_key="chat_history",
     )
-    print("\nSetting up answer refinement chain...")
-    refine_template = """You are an expert at editing and refining content.
-Your task is to take a given answer and improve its clarity, structure, and readability.
-Use formatting such as bold text, bullet points, or numbered lists where it enhances the explanation.
-Do not add any new information that wasn't in the original answer.
-Original Answer:
-{answer}
-Refined Answer:"""
-    refine_prompt = ChatPromptTemplate.from_template(refine_template)
-    refinement_chain = refine_prompt | llm | StrOutputParser()
-    final_chain = (
-        lambda input_dict: {"answer": chain_with_memory.invoke(input_dict, config=input_dict.get('config'))}
-    ) | refinement_chain
-    print("\nFinalizing the complete chain with memory...")
-    return final_chain

 from langchain_core.runnables import RunnableParallel, RunnablePassthrough
 from langchain_core.output_parsers import StrOutputParser
 from langchain_core.runnables.history import RunnableWithMessageHistory
+from langchain_core.documents import Document
+from query_expansion import expand_query_simple
+from typing import List, Optional
+import time
+class GroqAPIKeyManager:
+    def __init__(self, api_keys: List[str]):
+        self.api_keys = [key for key in api_keys if key and key != "your_groq_api_key_here"]
+        if not self.api_keys:
+            raise ValueError("No valid API keys provided!")
+        self.current_index = 0
+        self.failed_keys = set()
+        self.success_count = {key: 0 for key in self.api_keys}
+        self.failure_count = {key: 0 for key in self.api_keys}
+        print(f"API Key Manager: Loaded {len(self.api_keys)} API keys")
+    def get_current_key(self) -> str:
+        return self.api_keys[self.current_index]
+    def mark_success(self, api_key: str):
+        if api_key in self.success_count:
+            self.success_count[api_key] += 1
+            if api_key in self.failed_keys:
+                self.failed_keys.remove(api_key)
+                print(f"API Key #{self.api_keys.index(api_key) + 1} recovered!")
+    def mark_failure(self, api_key: str):
+        if api_key in self.failure_count:
+            self.failure_count[api_key] += 1
+            self.failed_keys.add(api_key)
+    def rotate_to_next_key(self) -> bool:
+        initial_index = self.current_index
+        attempts = 0
+        while attempts < len(self.api_keys):
+            self.current_index = (self.current_index + 1) % len(self.api_keys)
+            attempts += 1
+            current_key = self.api_keys[self.current_index]
+            if attempts >= len(self.api_keys):
+                print(f"All keys attempted, retrying with key #{self.current_index + 1}")
+                return True
+            if current_key not in self.failed_keys:
+                print(f"Switching to API Key #{self.current_index + 1}")
+                return True
+        return False
+    def get_statistics(self) -> str:
+        stats = []
+        for i, key in enumerate(self.api_keys):
+            success = self.success_count[key]
+            failure = self.failure_count[key]
+            status = "FAILED" if key in self.failed_keys else "ACTIVE"
+            masked_key = key[:8] + "..." + key[-4:] if len(key) > 12 else "***"
+            stats.append(f"   Key #{i+1} ({masked_key}): {success} success, {failure} failures [{status}]")
+        return "\n".join(stats)
+def load_api_keys_from_hf_secrets() -> List[str]:
+    api_keys = []
+    secret_names = ['GROQ_API_KEY_1', 'GROQ_API_KEY_2', 'GROQ_API_KEY_3', 'GROQ_API_KEY_4']
+    print("Loading API keys from Hugging Face Secrets...")
+    for secret_name in secret_names:
+        try:
+            api_key = os.getenv(secret_name)
+            if api_key and api_key.strip() and api_key != "your_groq_api_key_here":
+                api_keys.append(api_key.strip())
+                print(f" Loaded: {secret_name}")
+            else:
+                print(f" Not found or empty: {secret_name}")
+        except Exception as e:
+            print(f" Error loading {secret_name}: {str(e)}")
+    return api_keys
+def create_llm_with_fallback(
+    api_key_manager: GroqAPIKeyManager,
+    model_name: str,
+    temperature: float,
+    max_retries: int = 3
+) -> ChatGroq:
+    for attempt in range(max_retries):
+        current_key = api_key_manager.get_current_key()
+        try:
+            llm = ChatGroq(
+                model_name=model_name,
+                api_key=current_key,
+                temperature=temperature
             )
+            test_result = llm.invoke("test")
+            api_key_manager.mark_success(current_key)
+            return llm
+        except Exception as e:
+            error_msg = str(e).lower()
+            api_key_manager.mark_failure(current_key)
+            if "rate" in error_msg or "limit" in error_msg:
+                print(f"  Rate limit hit on API Key #{api_key_manager.current_index + 1}")
+            elif "auth" in error_msg or "api" in error_msg:
+                print(f"  Authentication failed on API Key #{api_key_manager.current_index + 1}")
+            else:
+                print(f"  Error with API Key #{api_key_manager.current_index + 1}: {str(e)[:50]}")
+            if attempt < max_retries - 1:
+                if api_key_manager.rotate_to_next_key():
+                    print(f" Retrying with next API key (Attempt {attempt + 2}/{max_retries})...")
+                    time.sleep(1)
+                else:
+                    raise ValueError("All API keys failed!")
+            else:
+                raise ValueError(f"Failed to initialize LLM after {max_retries} attempts")
+    raise ValueError("Failed to create LLM with any available API key")
+def create_multi_query_retriever(base_retriever, llm, strategy: str = "balanced"):
+    def multi_query_retrieve(query: str) -> List[Document]:
+        query_variations = expand_query_simple(query, strategy=strategy, llm=llm)
+        all_docs = []
+        seen_content = set()
+        for i, query_var in enumerate(query_variations):
+            try:
+                docs = base_retriever.invoke(query_var)
+                for doc in docs:
+                    content_hash = hash(doc.page_content)
+                    if content_hash not in seen_content:
+                        seen_content.add(content_hash)
+                        all_docs.append(doc)
+            except Exception as e:
+                print(f" Query Expansion Error (Query {i+1}): {str(e)[:50]}")
+                continue
+        print(f" Query Expansion: Retrieved {len(all_docs)} unique documents.")
+        return all_docs
+    return multi_query_retrieve
+def get_system_prompt(temperature: float) -> str:
+    if temperature <= 0.4:
+        return """You are CogniChat, an expert document analysis assistant specializing in comprehensive and well-structured answers.
+RESPONSE GUIDELINES:
+**Structure & Formatting:**
+- Start with a direct answer to the question
+- Use **bold** for key terms, important concepts, and technical terminology
+- Use bullet points (•) for lists, features, or multiple items
+- Use numbered lists (1., 2., 3.) for steps, procedures, or sequential information
+- Use ### Headers to organize different sections or topics
+- Add blank lines between sections for readability
+**Source Citation:**
+- Always cite information using: [Source: filename, Page: X] and cite it at the end of the entire answer only
+- Place citations at the end of your final answer only
+- Do not cite sources within the body of your answer
+- Multiple sources: [Source: doc1.pdf, Page: 3; doc2.pdf, Page: 7]
+**Completeness:**
+- Provide thorough, detailed answers using ALL relevant information from context
+- Summarize and properly elaborate each point for increased clarity
+- If the question has multiple parts, address each part clearly
+**Accuracy:**
+- ONLY use information from the provided context documents below
+- If information is incomplete, state what IS available and what ISN'T
+- If the answer isn't in the context, clearly state: "I cannot find this information in the uploaded documents"
+- Never make assumptions or add information not in the context
+---
+{context}
+---
+Now answer the following question comprehensively using the context above:"""
+    elif temperature <= 0.8:
+        return """You are CogniChat, an intelligent document analysis assistant that combines accuracy with engaging communication.
+RESPONSE GUIDELINES:
+**Communication Style:**
+- Present information in a clear, engaging manner
+- Use **bold** for emphasis on important concepts
+- Balance structure with natural flow
+- Make complex topics accessible and interesting
+**Content Approach:**
+- Ground your response firmly in the provided context
+- Add helpful explanations and connections between concepts
+- Use analogies or examples when they help clarify ideas (but keep them brief)
+- Organize information logically with headers (###) and lists where appropriate
+**Source Attribution:**
+- Cite sources at the end: [Source: filename, Page: X]
+- Be transparent about what the documents do and don't contain
+**Accuracy:**
+- Base your answer on the context documents provided
+- If information is partial, explain what's available
+- Acknowledge gaps: "The documents don't cover this aspect"
+---
+{context}
+---
+Now answer the following question in an engaging yet accurate way:"""
+    else:  # temperature > 0.8
+        # Creative BUT CLEAR prompt - REVISED VERSION
+        return """You are CogniChat, a creative document analyst who makes complex information clear, memorable, and engaging.
+ YOUR CORE MISSION: **CLARITY FIRST, CREATIVITY SECOND**
+Make information easier to understand, not harder. Your creativity should illuminate, not obscure.
+**CREATIVE CLARITY PRINCIPLES:**
+1. **Simplify, Don't Complicate**
+   - Break down complex concepts into simple, digestible parts
+   - Use everyday language alongside technical terms
+   - Explain jargon immediately in plain English
+   - Short sentences for complex ideas, varied length for rhythm
+2. **Smart Use of Examples & Analogies** (Use Sparingly!)
+   - Only use analogies when they genuinely make something clearer
+   - Keep analogies simple and relatable (everyday objects/experiences)
+   - Never use metaphors that require explanation themselves
+   - If you can explain it directly in simple terms, do that instead
+3. **Engaging Structure**
+   - Start with the core answer in one clear sentence
+   - Use **bold** to highlight key takeaways
+   - Break information into logical chunks with ### headers
+   - Use bullet points for clarity, not decoration
+   - Add brief transition phrases to connect ideas smoothly
+4. **Conversational Yet Precise**
+   - Write like you're explaining to a smart friend
+   - Use "you" and active voice to engage readers
+   - Ask occasional rhetorical questions only if they aid understanding
+   - Vary sentence length to maintain interest
+   - Use emojis sparingly (1-2 max) and only where they add clarity
+5. **Visual Clarity**
+   - Strategic use of formatting: **bold** for key terms, *italics* for emphasis
+   - White space between sections for easy scanning
+   - Progressive disclosure: simple concepts first, details after
+   - Numbered lists for sequences, bullets for related items
+**WHAT TO AVOID:**
+-  Flowery or overly descriptive language
+-  Complex metaphors that need their own explanation
+-  Long narrative storytelling that buries the facts
+-  Multiple rhetorical questions in a row
+-  Overuse of emojis or exclamation points
+-  Making simple things sound complicated
+**ACCURACY BOUNDARIES:**
+-  Creative explanation and presentation of facts
+-  Simple, helpful examples from common knowledge
+-  Reorganizing information for better understanding
+-  Never invent facts not in the documents
+-  Don't contradict source material
+-  If info is missing, say so clearly and briefly
+**Source Attribution:**
+- End with: [Source: filename, Page: X]
+- Keep it simple and clear
+---
+{context}
+---
+Now, explain the answer clearly and engagingly. Remember: if your grandmother couldn't understand it, simplify more:"""
+def create_rag_chain(
+    retriever,
+    get_session_history_func,
+    enable_query_expansion=True,
+    expansion_strategy="balanced",
+    model_name: str = "moonshotai/kimi-k2-instruct",
+    temperature: float = 0.2,
+    api_keys: Optional[List[str]] = None
+):
+    if api_keys is None:
+        api_keys = load_api_keys_from_hf_secrets()
+    if not api_keys:
+        raise ValueError(
+            "No valid API keys found! Please set GROQ_API_KEY or GROQ_API_KEY_1, "
+            "GROQ_API_KEY_2, GROQ_API_KEY_3, GROQ_API_KEY_4 in your .env file"
+        )
+    api_key_manager = GroqAPIKeyManager(api_keys)
+    print(f" RAG: Initializing LLM - Model: {model_name}, Temp: {temperature}")
+    if temperature <= 0.4:
+        creativity_mode = "FACTUAL & STRUCTURED"
+    elif temperature <= 0.8:
+        creativity_mode = "BALANCED & ENGAGING"
+    else:
+        creativity_mode = "CREATIVE & STORYTELLING"
+    print(f"Creativity Mode: {creativity_mode}")
+    llm = create_llm_with_fallback(api_key_manager, model_name, temperature)
+    print(f"LLM initialized with API Key #{api_key_manager.current_index + 1}")
+    if enable_query_expansion:
+        print(f"RAG: Query Expansion ENABLED (Strategy: {expansion_strategy})")
+        enhanced_retriever = create_multi_query_retriever(
+            base_retriever=retriever,
+            llm=llm,
+            strategy=expansion_strategy
+        )
+    else:
+        enhanced_retriever = retriever
+    rewrite_template = """You are an expert at optimizing search queries for document retrieval.
+Given the conversation history and a follow-up question, create a comprehensive standalone question that:
+1. Incorporates all relevant context from the chat history
+2. Expands abbreviations and resolves all pronouns (it, they, this, that, etc.)
+3. Includes key technical terms and concepts that would help find relevant documents
+4. Maintains the original intent, specificity, and detail level
+5. If the question asks for comparison or multiple items, ensure all items are in the query
 Chat History:
 {chat_history}
 Follow-up Question: {question}
+Optimized Standalone Question:"""
     rewrite_prompt = ChatPromptTemplate.from_messages([
         ("system", rewrite_template),
         MessagesPlaceholder(variable_name="chat_history"),
+        ("human", "{question}")
     ])
     query_rewriter = rewrite_prompt | llm | StrOutputParser()
+    def format_docs(docs):
+        if not docs:
+            return "No relevant documents found in the knowledge base."
+        formatted_parts = []
+        for i, doc in enumerate(docs, 1):
+            source = doc.metadata.get('source', 'Unknown Document')
+            page = doc.metadata.get('page', 'N/A')
+            rerank_score = doc.metadata.get('rerank_score')
+            content = doc.page_content.strip()
+            doc_header = f"{'='*60}\nDOCUMENT {i}\n{'='*60}"
+            metadata_line = f"Source: {source} | Page: {page}"
+            if rerank_score:
+                metadata_line += f" | Relevance: {rerank_score:.3f}"
+            formatted_parts.append(
+                f"{doc_header}\n"
+                f"{metadata_line}\n"
+                f"{'-'*60}\n"
+                f"{content}\n"
+            )
+        return f"RETRIEVED CONTEXT ({len(docs)} documents):\n\n" + "\n".join(formatted_parts)
+    rag_template = get_system_prompt(temperature)
     rag_prompt = ChatPromptTemplate.from_messages([
         ("system", rag_template),
         MessagesPlaceholder(variable_name="chat_history"),
         ("human", "{question}"),
     ])
+    rewriter_input = RunnableParallel({
+        "question": itemgetter("question"),
+        "chat_history": itemgetter("chat_history"),
+    })
+    retrieval_chain = rewriter_input | query_rewriter | enhanced_retriever | format_docs
+    conversational_rag_chain = RunnableParallel({
+        "context": retrieval_chain,
+        "question": itemgetter("question"),
+        "chat_history": itemgetter("chat_history"),
+    }) | rag_prompt | llm | StrOutputParser()
     chain_with_memory = RunnableWithMessageHistory(
         conversational_rag_chain,
         get_session_history_func,
         input_messages_key="question",
         history_messages_key="chat_history",
     )
+    print("RAG: Chain created successfully.")
+    print("\n" + api_key_manager.get_statistics())
+    return chain_with_memory, api_key_manager

templates/index.html CHANGED Viewed

@@ -1,615 +1,885 @@
 <!DOCTYPE html>
-<html lang="en">
-<head>
-    <meta charset="UTF-8">
-    <meta name="viewport" content="width=device-width, initial-scale=1.0">
-    <title>CogniChat - Chat with your Documents</title>
-    <script src="https://cdn.tailwindcss.com"></script>
-    <link rel="preconnect" href="https://fonts.googleapis.com">
-    <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
-    <link href="https://fonts.googleapis.com/css2?family=Google+Sans:wght@400;500;700&family=Roboto:wght@400;500&display=swap" rel="stylesheet">
-    <script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
-    <style>
-        :root {
-            --background: #f0f4f9;
-            --foreground: #1f1f1f;
-            --primary: #1a73e8;
-            --primary-hover: #1867cf;
-            --card: #ffffff;
-            --card-border: #dadce0;
-            --input-bg: #e8f0fe;
-            --user-bubble: #d9e7ff;
-            --bot-bubble: #f1f3f4;
-        }
-        /* Dark mode styles */
-        .dark {
-            --background: #202124;
-            --foreground: #e8eaed;
-            --primary: #8ab4f8;
-            --primary-hover: #99bdfa;
-            --card: #303134;
-            --card-border: #5f6368;
-            --input-bg: #303134;
-            --user-bubble: #3c4043;
-            --bot-bubble: #3c4043;
-        }
-        body {
-            font-family: 'Google Sans', 'Roboto', sans-serif;
-            background-color: var(--background);
-            color: var(--foreground);
-            overflow: hidden;
-        }
-        #chat-window::-webkit-scrollbar { width: 8px; }
-        #chat-window::-webkit-scrollbar-track { background: transparent; }
-        #chat-window::-webkit-scrollbar-thumb { background-color: #bdc1c6; border-radius: 20px; }
-        .dark #chat-window::-webkit-scrollbar-thumb { background-color: #5f6368; }
-        .drop-zone--over {
-            border-color: var(--primary);
-            box-shadow: 0 0 15px rgba(26, 115, 232, 0.3);
-        }
-        /* Loading Spinner */
-        .loader {
-            width: 48px;
-            height: 48px;
-            border: 3px solid var(--card-border);
-            border-radius: 50%;
-            display: inline-block;
-            position: relative;
-            box-sizing: border-box;
-            animation: rotation 1s linear infinite;
-        }
-        .loader::after {
-            content: '';
-            box-sizing: border-box;
             position: absolute;
-            left: 50%;
-            top: 50%;
-            transform: translate(-50%, -50%);
-            width: 56px;
-            height: 56px;
-            border-radius: 50%;
-            border: 3px solid;
-            border-color: var(--primary) transparent;
-        }
-        @keyframes rotation {
-            0% { transform: rotate(0deg); }
-            100% { transform: rotate(360deg); }
-        }
-        /* Typing Indicator Animation */
-        .typing-indicator span {
-            height: 10px;
-            width: 10px;
-            background-color: #9E9E9E;
-            border-radius: 50%;
-            display: inline-block;
-            animation: bounce 1.4s infinite ease-in-out both;
-        }
-        .typing-indicator span:nth-child(1) { animation-delay: -0.32s; }
-        .typing-indicator span:nth-child(2) { animation-delay: -0.16s; }
-        @keyframes bounce {
-            0%, 80%, 100% { transform: scale(0); }
-            40% { transform: scale(1.0); }
-        }
-        /* Enhanced Markdown Styling for better readability and aesthetics */
-        .markdown-content p {
-            margin-bottom: 1rem;
-            line-height: 1.75;
-        }
-        .markdown-content h1, .markdown-content h2, .markdown-content h3, .markdown-content h4 {
-            font-family: 'Google Sans', sans-serif;
-            font-weight: 700;
-            margin-top: 1.75rem;
-            margin-bottom: 1rem;
-            line-height: 1.3;
-        }
-        .markdown-content h1 { font-size: 1.75em; border-bottom: 1px solid var(--card-border); padding-bottom: 0.5rem; }
-        .markdown-content h2 { font-size: 1.5em; }
-        .markdown-content h3 { font-size: 1.25em; }
-        .markdown-content h4 { font-size: 1.1em; }
-        .markdown-content ul, .markdown-content ol {
-            padding-left: 1.75rem;
-            margin-bottom: 1rem;
-        }
-        .markdown-content li {
-            margin-bottom: 0.5rem;
-        }
-        .dark .markdown-content ul > li::marker { color: var(--primary); }
-        .markdown-content ul > li::marker { color: var(--primary); }
-        .markdown-content a {
-            color: var(--primary);
-            text-decoration: none;
-            font-weight: 500;
-            border-bottom: 1px solid transparent;
-            transition: all 0.2s ease-in-out;
-        }
-        .markdown-content a:hover {
-            border-bottom-color: var(--primary-hover);
-        }
-        .markdown-content blockquote {
-            margin: 1.5rem 0;
-            padding-left: 1.5rem;
-            border-left: 4px solid var(--card-border);
-            color: #6c757d;
-            font-style: italic;
-        }
-        .dark .markdown-content blockquote {
-            color: #adb5bd;
-        }
-        .markdown-content hr {
-            border: none;
-            border-top: 1px solid var(--card-border);
-            margin: 2rem 0;
-        }
-        .markdown-content table {
-            width: 100%;
-            border-collapse: collapse;
-            margin: 1.5rem 0;
-            font-size: 0.9em;
-            box-shadow: 0 1px 3px rgba(0,0,0,0.05);
-            border-radius: 8px;
-            overflow: hidden;
-        }
-        .markdown-content th, .markdown-content td {
             border: 1px solid var(--card-border);
-            padding: 0.75rem 1rem;
-            text-align: left;
-        }
-        .markdown-content th {
-            background-color: var(--bot-bubble);
-            font-weight: 500;
-        }
-        .markdown-content code {
-            background-color: rgba(0,0,0,0.05);
-            padding: 0.2rem 0.4rem;
-            border-radius: 0.25rem;
-            font-family: 'Roboto Mono', monospace;
-            font-size: 0.9em;
-        }
-        .dark .markdown-content code {
-            background-color: rgba(255,255,255,0.1);
-        }
-        .markdown-content pre {
-            position: relative;
-            background-color: #f8f9fa;
-            border: 1px solid var(--card-border);
-            border-radius: 0.5rem;
-            margin-bottom: 1rem;
-        }
-        .dark .markdown-content pre {
-            background-color: #2e2f32;
-        }
-        .markdown-content pre code {
-            background: none;
-            padding: 1rem;
-            display: block;
-            overflow-x: auto;
-        }
-        .markdown-content pre .copy-code-btn {
-            position: absolute;
-            top: 0.5rem;
-            right: 0.5rem;
-            background-color: #e8eaed;
-            border: 1px solid #dadce0;
-            color: #5f6368;
-            padding: 0.3rem 0.6rem;
-            border-radius: 0.25rem;
-            cursor: pointer;
-            opacity: 0;
-            transition: opacity 0.2s;
             font-size: 0.8em;
-        }
-        .dark .markdown-content pre .copy-code-btn {
-            background-color: #3c4043;
-            border-color: #5f6368;
-            color: #e8eaed;
-        }
-        .markdown-content pre:hover .copy-code-btn {
-            opacity: 1;
-        }
-        /* Spinner for the TTS button */
-        .tts-button-loader {
-            width: 16px;
-            height: 16px;
-            border: 2px solid currentColor; /* Use button's text color */
-            border-radius: 50%;
-            display: inline-block;
-            box-sizing: border-box;
-            animation: rotation 0.8s linear infinite;
-            border-bottom-color: transparent; /* Makes it a half circle spinner */
-        }
-    </style>
-</head>
-<body class="w-screen h-screen dark">
-    <main id="main-content" class="h-full flex flex-col transition-opacity duration-500">
-        <div id="chat-container" class="hidden flex-1 flex flex-col w-full mx-auto overflow-hidden">
-            <header class="text-center p-4 border-b border-[var(--card-border)] flex-shrink-0">
-                <h1 class="text-xl font-medium">Chat with your Docs</h1>
-                <p id="chat-filename" class="text-xs text-gray-500 dark:text-gray-400 mt-1"></p>
-            </header>
-            <div id="chat-window" class="flex-1 overflow-y-auto p-4 md:p-6 lg:p-10">
-                <div id="chat-content" class="max-w-4xl mx-auto space-y-8">
-                </div>
-            </div>
-            <div class="p-4 flex-shrink-0 bg-[var(--background)] border-t border-[var(--card-border)]">
-                <form id="chat-form" class="max-w-4xl mx-auto bg-[var(--card)] rounded-full p-2 flex items-center shadow-sm border border-transparent focus-within:border-[var(--primary)] transition-colors">
-                    <input type="text" id="chat-input" placeholder="Ask a question about your documents..." class="flex-grow bg-transparent focus:outline-none px-4 text-sm" autocomplete="off">
-                    <button type="submit" id="chat-submit-btn" class="bg-[var(--primary)] hover:bg-[var(--primary-hover)] text-white p-2 rounded-full transition-all duration-200 disabled:opacity-50 disabled:cursor-not-allowed disabled:bg-gray-500" title="Send">
-                        <svg class="w-5 h-5" viewBox="0 0 24 24" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M3.49941 11.5556L11.555 3.5L12.4438 4.38889L6.27721 10.5556H21.9994V11.5556H6.27721L12.4438 17.7222L11.555 18.6111L3.49941 10.5556V11.5556Z" transform="rotate(180, 12.7497, 11.0556)" fill="currentColor"></path></svg>
-                    </button>
-                </form>
-            </div>
-        </div>
-        <div id="upload-container" class="flex-1 flex flex-col items-center justify-center p-8 transition-opacity duration-300">
-            <div class="text-center">
-                <h1 class="text-5xl font-medium mb-4">Upload docs to chat</h1>
-                <div id="drop-zone" class="w-full max-w-lg text-center border-2 border-dashed border-[var(--card-border)] rounded-2xl p-10 transition-all duration-300 cursor-pointer bg-[var(--card)] hover:border-[var(--primary)]">
-                    <input id="file-upload" type="file" class="hidden" accept=".pdf,.txt,.docx,.jpg,.jpeg,.png" multiple title="input">
-                    <svg class="mx-auto h-12 w-12 text-gray-400" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" ><path stroke-linecap="round" stroke-linejoin="round" d="M12 16.5V9.75m0 0l3-3m-3 3l-3 3M6.75 19.5a4.5 4.5 0 01-1.41-8.775 5.25 5.25 0 0110.233-2.33 3 3 0 013.758 3.848A3.752 3.752 0 0118 19.5H6.75z"></path></svg>
-                    <p class="mt-4 text-sm font-medium">Drag & drop files or click to upload</p>
-                    <p id="file-name" class="mt-2 text-xs text-gray-500"></p>
-                </div>
-            </div>
-        </div>
-        <div id="loading-overlay" class="hidden fixed inset-0 bg-[var(--background)] bg-opacity-80 backdrop-blur-sm flex flex-col items-center justify-center z-50 text-center p-4">
-            <div class="loader"></div>
-            <p id="loading-text" class="mt-6 text-sm font-medium"></p>
-            <p id="loading-subtext" class="mt-2 text-xs text-gray-500 dark:text-gray-400"></p>
-        </div>
-    </main>
-    <script>
-        document.addEventListener('DOMContentLoaded', () => {
-            const uploadContainer = document.getElementById('upload-container');
-            const chatContainer = document.getElementById('chat-container');
-            const dropZone = document.getElementById('drop-zone');
-            const fileUploadInput = document.getElementById('file-upload');
-            const fileNameSpan = document.getElementById('file-name');
-            const loadingOverlay = document.getElementById('loading-overlay');
-            const loadingText = document.getElementById('loading-text');
-            const loadingSubtext = document.getElementById('loading-subtext');
-            const chatForm = document.getElementById('chat-form');
-            const chatInput = document.getElementById('chat-input');
-            const chatSubmitBtn = document.getElementById('chat-submit-btn');
-            const chatWindow = document.getElementById('chat-window');
-            const chatContent = document.getElementById('chat-content');
-            const chatFilename = document.getElementById('chat-filename');
-            let sessionId = null;
-            const storedSessionId = sessionStorage.getItem('cognichat_session_id');
-            if (storedSessionId) {
-                sessionId = storedSessionId;
-                console.debug('Restored session ID from storage:', sessionId);
-            }
-            // --- File Upload Logic ---
-            dropZone.addEventListener('click', () => fileUploadInput.click());
-            ['dragenter', 'dragover', 'dragleave', 'drop'].forEach(eventName => {
-                dropZone.addEventListener(eventName, preventDefaults, false);
-                document.body.addEventListener(eventName, preventDefaults, false);
-            });
-            ['dragenter', 'dragover'].forEach(eventName => {
-                dropZone.addEventListener(eventName, () => dropZone.classList.add('drop-zone--over'));
-            });
-            ['dragleave', 'drop'].forEach(eventName => {
-                dropZone.addEventListener(eventName, () => dropZone.classList.remove('drop-zone--over'));
-            });
-            dropZone.addEventListener('drop', (e) => {
-                const files = e.dataTransfer.files;
-                if (files.length > 0) handleFiles(files);
-            });
-            fileUploadInput.addEventListener('change', (e) => {
-                if (e.target.files.length > 0) handleFiles(e.target.files);
-            });
-            function preventDefaults(e) { e.preventDefault(); e.stopPropagation(); }
-            async function handleFiles(files) {
-                const formData = new FormData();
-                let fileNames = [];
-                for (const file of files) {
-                    formData.append('file', file);
-                    fileNames.push(file.name);
-                }
-                fileNameSpan.textContent = `Selected: ${fileNames.join(', ')}`;
-                await uploadAndProcessFiles(formData, fileNames);
-            }
-            async function uploadAndProcessFiles(formData, fileNames) {
-                loadingOverlay.classList.remove('hidden');
-                loadingText.textContent = `Processing ${fileNames.length} document(s)...`;
-                loadingSubtext.textContent = "🤓Creating a knowledge base may take a minute or two. So please hold on tight";
-                try {
-                    const response = await fetch('/upload', { method: 'POST', body: formData });
-                    const result = await response.json();
-                    if (!response.ok) throw new Error(result.message || 'Unknown error occurred.');
-                    if (result.session_id) {
-                        sessionId = result.session_id;
-                        sessionStorage.setItem('cognichat_session_id', sessionId);
-                        console.debug('Stored session ID from upload:', sessionId);
-                    } else {
-                        console.warn('Upload response missing session_id field.');
-                    }
-                    chatFilename.textContent = `Chatting with: ${result.filename}`;
-                    uploadContainer.classList.add('hidden');
-                    chatContainer.classList.remove('hidden');
-                    appendMessage("I've analyzed your documents. What would you like to know?", "bot");
-                } catch (error) {
-                    console.error('Upload error:', error);
-                    alert(`Error: ${error.message}`);
-                } finally {
-                    loadingOverlay.classList.add('hidden');
-                    loadingSubtext.textContent = '';
-                    fileNameSpan.textContent = '';
-                    fileUploadInput.value = '';
-                }
-            }
-            // --- Chat Logic ---
-            chatForm.addEventListener('submit', async (e) => {
-                e.preventDefault();
-                const question = chatInput.value.trim();
-                if (!question) return;
-                appendMessage(question, 'user');
-                chatInput.value = '';
-                chatInput.disabled = true;
-                chatSubmitBtn.disabled = true;
-                const typingIndicator = showTypingIndicator();
-                let botMessageContainer = null;
-                let contentDiv = null;
-                try {
-                    const requestBody = { question: question };
-                    if (sessionId) {
-                        requestBody.session_id = sessionId;
-                    }
-                    const response = await fetch('/chat', {
-                        method: 'POST',
-                        headers: { 'Content-Type': 'application/json' },
-                        body: JSON.stringify(requestBody),
-                    });
-                    if (!response.ok) throw new Error(`Server error: ${response.statusText}`);
-                    // ============================ MODIFICATION START ==============================
-                    // Parse the JSON response instead of reading a stream
-                    const result = await response.json();
-                    const answer = result.answer; // Extract the 'answer' field
-                    if (!answer) {
-                        throw new Error("Received an empty or invalid response from the server.");
-                    }
-                    typingIndicator.remove();
-                    botMessageContainer = appendMessage('', 'bot');
-                    contentDiv = botMessageContainer.querySelector('.markdown-content');
-                    // Use the extracted answer for rendering
-                    contentDiv.innerHTML = marked.parse(answer);
-                    contentDiv.querySelectorAll('pre').forEach(addCopyButton);
-                    scrollToBottom(); // Scroll after content is added
-                    // Use the extracted answer for TTS
-                    addTextToSpeechControls(botMessageContainer, answer);
-                    // ============================ MODIFICATION END ==============================
-                } catch (error) {
-                    console.error('Chat error:', error);
-                    if (typingIndicator) typingIndicator.remove();
-                    if (contentDiv) {
-                        contentDiv.innerHTML = `<p class="text-red-500">Error: ${error.message}</p>`;
-                    } else {
-                        appendMessage(`Error: ${error.message}`, 'bot');
-                    }
-                } finally {
-                    chatInput.disabled = false;
-                    chatSubmitBtn.disabled = false;
-                    chatInput.focus();
-                }
-            });
-            // --- UI Helper Functions ---
-            function appendMessage(text, sender) {
-                const messageWrapper = document.createElement('div');
-                messageWrapper.className = `flex items-start gap-4`;
-                const iconSVG = sender === 'user'
-                    ? `<div class="bg-blue-100 dark:bg-gray-700 p-2.5 rounded-full flex-shrink-0 mt-1"><svg class="w-5 h-5 text-blue-600 dark:text-blue-300" viewBox="0 0 24 24"><path fill="currentColor" d="M12 12c2.21 0 4-1.79 4-4s-1.79-4-4-4-4 1.79-4 4 1.79 4 4 4zm0 2c-2.67 0-8 1.34-8 4v2h16v-2c0-2.66-5.33-4-8-4z"></path></svg></div>`
-                    : `<div class="bg-gray-200 dark:bg-gray-700 rounded-full flex-shrink-0 mt-1 text-xl flex items-center justify-center w-10 h-10">✨</div>`;
-                const messageBubble = document.createElement('div');
-                messageBubble.className = `flex-1 pt-1`;
-                const senderName = document.createElement('p');
-                senderName.className = 'font-medium text-sm mb-1';
-                senderName.textContent = sender === 'user' ? 'You' : 'CogniChat';
-                const contentDiv = document.createElement('div');
-                contentDiv.className = 'text-base markdown-content';
-                // Only parse if text is not empty
-                if (text) {
-                    contentDiv.innerHTML = marked.parse(text);
-                }
-                const controlsContainer = document.createElement('div');
-                controlsContainer.className = 'tts-controls mt-2';
-                messageBubble.appendChild(senderName);
-                messageBubble.appendChild(contentDiv);
-                messageBubble.appendChild(controlsContainer);
-                messageWrapper.innerHTML = iconSVG;
-                messageWrapper.appendChild(messageBubble);
-                chatContent.appendChild(messageWrapper);
-                scrollToBottom();
-                return messageBubble;
-            }
-            function showTypingIndicator() {
-                const indicatorWrapper = document.createElement('div');
-                indicatorWrapper.className = `flex items-start gap-4`;
-                indicatorWrapper.id = 'typing-indicator';
-                const iconSVG = `<div class="bg-gray-200 dark:bg-gray-700 rounded-full flex-shrink-0 mt-1 text-xl flex items-center justify-center w-10 h-10">✨</div>`;
-                const messageBubble = document.createElement('div');
-                messageBubble.className = 'flex-1 pt-1';
-                const senderName = document.createElement('p');
-                senderName.className = 'font-medium text-sm mb-1';
-                senderName.textContent = 'CogniChat is thinking...';
-                const indicator = document.createElement('div');
-                indicator.className = 'typing-indicator';
-                indicator.innerHTML = '<span></span><span></span><span></span>';
-                messageBubble.appendChild(senderName);
-                messageBubble.appendChild(indicator);
-                indicatorWrapper.innerHTML = iconSVG;
-                indicatorWrapper.appendChild(messageBubble);
-                chatContent.appendChild(indicatorWrapper);
-                scrollToBottom();
-                return indicatorWrapper;
-            }
-            function scrollToBottom() {
-                chatWindow.scrollTo({
-                    top: chatWindow.scrollHeight,
-                    behavior: 'smooth'
-                });
-            }
-            function addCopyButton(pre) {
-                const button = document.createElement('button');
-                button.className = 'copy-code-btn';
-                button.textContent = 'Copy';
-                pre.appendChild(button);
-                button.addEventListener('click', () => {
-                    const code = pre.querySelector('code').innerText;
-                    navigator.clipboard.writeText(code).then(() => {
-                        button.textContent = 'Copied!';
-                        setTimeout(() => button.textContent = 'Copy', 2000);
-                    });
-                });
-            }
-            // --- Text-to-Speech Logic ---
-            let currentAudio = null;
-            let currentPlayingButton = null;
-            const playIconSVG = `<svg class="w-5 h-5" fill="currentColor" viewBox="0 0 24 24"><path d="M8 5v14l11-7z"/></svg>`;
-            const pauseIconSVG = `<svg class="w-5 h-5" fill="currentColor" viewBox="0 0 24 24"><path d="M6 19h4V5H6v14zm8-14v14h4V5h-4z"/></svg>`;
-            function addTextToSpeechControls(messageBubble, text) {
-                const ttsControls = messageBubble.querySelector('.tts-controls');
-                if (text.trim().length > 0) {
-                    const speakButton = document.createElement('button');
-                    speakButton.className = 'speak-btn px-4 py-2 bg-blue-700 text-white rounded-full text-sm font-medium hover:bg-blue-800 transition-colors flex items-center gap-2 disabled:opacity-50 disabled:cursor-not-allowed';
-                    speakButton.title = 'Listen to this message';
-                    speakButton.setAttribute('data-state', 'play');
-                    speakButton.innerHTML = `${playIconSVG} <span>Play</span>`;
-                    ttsControls.appendChild(speakButton);
-                    speakButton.addEventListener('click', () => handleTTS(text, speakButton));
-                }
-            }
-            async function handleTTS(text, button) {
-                if (button === currentPlayingButton) {
-                    if (currentAudio && !currentAudio.paused) {
-                        currentAudio.pause();
-                        button.setAttribute('data-state', 'paused');
-                        button.innerHTML = `${playIconSVG} <span>Play</span>`;
-                    } else if (currentAudio && currentAudio.paused) {
-                        currentAudio.play();
-                        button.setAttribute('data-state', 'playing');
-                        button.innerHTML = `${pauseIconSVG} <span>Pause</span>`;
-                    }
-                    return;
-                }
-                resetAllSpeakButtons();
-                currentPlayingButton = button;
-                button.setAttribute('data-state', 'loading');
-                button.innerHTML = `<div class="tts-button-loader"></div> <span>Loading...</span>`;
-                button.disabled = true;
-                try {
-                    const response = await fetch('/tts', {
-                        method: 'POST',
-                        headers: { 'Content-Type': 'application/json' },
-                        body: JSON.stringify({ text: text })
-                    });
-                    if (!response.ok) throw new Error('Failed to generate audio.');
-                    const blob = await response.blob();
-                    const audioUrl = URL.createObjectURL(blob);
-                    currentAudio = new Audio(audioUrl);
-                    currentAudio.play();
-                    button.setAttribute('data-state', 'playing');
-                    button.innerHTML = `${pauseIconSVG} <span>Pause</span>`;
-                    currentAudio.onended = () => {
-                        button.setAttribute('data-state', 'play');
-                        button.innerHTML = `${playIconSVG} <span>Play</span>`;
-                        currentAudio = null;
-                        currentPlayingButton = null;
-                    };
-                } catch (error) {
-                    console.error('TTS Error:', error);
-                    button.setAttribute('data-state', 'error');
-                    button.innerHTML = `${playIconSVG} <span>Error</span>`;
-                    alert('Failed to play audio. Please try again.');
-                    resetAllSpeakButtons();
-                } finally {
-                    button.disabled = false;
-                }
-            }
-            function resetAllSpeakButtons() {
-                document.querySelectorAll('.speak-btn').forEach(btn => {
-                    btn.setAttribute('data-state', 'play');
-                    btn.innerHTML = `${playIconSVG} <span>Play</span>`;
                     btn.disabled = false;
-                });
-                if (currentAudio) {
-                    currentAudio.pause();
-                    currentAudio = null;
-                }
-                currentPlayingButton = null;
-            }
-        });
-    </script>
-</body>
-</html>

 <!DOCTYPE html>
+ <html lang="en">
+ <head>
+     <meta charset="UTF-8">
+     <meta name="viewport" content="width=device-width, initial-scale=1.0">
+     <title>CogniChat - Chat with your Documents</title>
+     <script src="https://cdn.tailwindcss.com"></script>
+     <link rel="preconnect" href="https://fonts.googleapis.com">
+     <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
+     <link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&family=Google+Sans:wght@400;500;700&family=Roboto:wght@400;500&display=swap" rel="stylesheet">
+     <script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
+     <style>
+         :root {
+             --background: #f0f4f9;
+             --foreground: #1f1f1f;
+             --primary: #1a73e8;
+             --primary-hover: #1867cf;
+             --card: #ffffff;
+             --card-border: #dadce0;
+             --input-bg: #e8f0fe;
+             --user-bubble: #d9e7ff;
+             --bot-bubble: #f1f3f4;
+             --select-bg: #ffffff;
+             --select-border: #dadce0;
+             --select-text: #1f1f1f;
+         }
+         .dark {
+             --background: #111827; /* Darker background */
+             --foreground: #e5e7eb;
+             --primary: #3b82f6; /* Adjusted primary blue */
+             --primary-hover: #60a5fa; /* Lighter hover blue */
+             --card: #1f2937;    /* Dark card background */
+             --card-border: #4b5563; /* Greyer border */
+             --input-bg: #374151;    /* Darker input background */
+             --user-bubble: #374151; /* Darker user bubble */
+             --bot-bubble: #374151;  /* Darker bot bubble */
+             --select-bg: #374151;
+             --select-border: #6b7280;
+             --select-text: #f3f4f6;
+             --code-bg: #2d2d2d;     /* Specific background for code blocks */
+             --code-text: #d4d4d4;   /* Light grey text for code */
+             --copy-btn-bg: #4a4a4a;
+             --copy-btn-hover-bg: #5a5a5a;
+             --copy-btn-text: #e0e0e0;
+         }
+         body {
+             font-family: 'Inter', 'Google Sans', 'Roboto', sans-serif;
+             background-color: var(--background);
+             color: var(--foreground);
+             overflow: hidden; /* Prevent body scroll */
+         }
+         #chat-window {
+             scroll-behavior: smooth; /* Ensure smooth programatic scroll */
+         }
+         #chat-window::-webkit-scrollbar { width: 8px; }
+         #chat-window::-webkit-scrollbar-track { background: transparent; }
+         #chat-window::-webkit-scrollbar-thumb { background-color: #4b5563; border-radius: 20px; }
+         .dark #chat-window::-webkit-scrollbar-thumb { background-color: #5f6368; }
+         .drop-zone--over {
+             border-color: var(--primary);
+             box-shadow: 0 0 20px rgba(59, 130, 246, 0.4);
+         }
+         .loader {
+             width: 48px;
+             height: 48px;
+             border: 3px solid var(--card-border);
+             border-radius: 50%;
+             display: inline-block;
+             position: relative;
+             box-sizing: border-box;
+             animation: rotation 1s linear infinite;
+         }
+         .loader::after {
+             content: '';
+             box-sizing: border-box;
+             position: absolute;
+             left: 50%;
+             top: 50%;
+             transform: translate(-50%, -50%);
+             width: 56px;
+             height: 56px;
+             border-radius: 50%;
+             border: 3px solid;
+             border-color: var(--primary) transparent;
+         }
+         @keyframes rotation {
+             0% { transform: rotate(0deg); }
+             100% { transform: rotate(360deg); }
+         }
+         /* --- Updated Typing Indicator --- */
+         .typing-indicator {
+             display: inline-flex; /* Changed to inline-flex */
+             align-items: center;
+             padding: 8px 0; /* Add some vertical padding */
+         }
+         .typing-indicator span {
+             height: 8px; /* Slightly smaller dots */
+             width: 8px;
+             margin: 0 2px;
+             background-color: #9E9E9E;
+             border-radius: 50%;
+             opacity: 0; /* Start invisible */
+             animation: typing-pulse 1.4s infinite ease-in-out;
+         }
+         .typing-indicator span:nth-child(1) { animation-delay: 0s; }
+         .typing-indicator span:nth-child(2) { animation-delay: 0.2s; }
+         .typing-indicator span:nth-child(3) { animation-delay: 0.4s; }
+         @keyframes typing-pulse {
+             0%, 100% { opacity: 0; transform: scale(0.7); }
+             50% { opacity: 1; transform: scale(1); }
+         }
+        /* --- End Typing Indicator --- */
+        /* --- Updated Markdown Styling --- */
+         .markdown-content { /* Base styles for the content area */
+             line-height: 1.75;
+         }
+         .markdown-content p { margin-bottom: 1rem; }
+         .markdown-content h1, .markdown-content h2, .markdown-content h3,
+         .markdown-content h4, .markdown-content h5, .markdown-content h6 {
+             font-weight: 600;
+             margin-top: 1.5rem;
+             margin-bottom: 0.75rem;
+             line-height: 1.3;
+         }
+         .markdown-content h1 { font-size: 1.5em; border-bottom: 1px solid var(--card-border); padding-bottom: 0.3rem;}
+         .markdown-content h2 { font-size: 1.25em; }
+         .markdown-content h3 { font-size: 1.1em; }
+         .markdown-content ul, .markdown-content ol { padding-left: 1.75rem; margin-bottom: 1rem; }
+         .markdown-content li { margin-bottom: 0.5rem; }
+         .markdown-content a { color: var(--primary); text-decoration: none; font-weight: 500; }
+         .markdown-content a:hover { text-decoration: underline; }
+         .markdown-content strong, .markdown-content b { font-weight: 600; } /* Ensure bold works */
+         .markdown-content blockquote {
+             border-left: 4px solid var(--card-border);
+             padding-left: 1rem;
+             margin-left: 0;
+             margin-bottom: 1rem;
+             color: #a0aec0; /* Lighter text for quotes */
+         }
+        /* --- Code Block Styling --- */
+         .markdown-content pre {
+             position: relative;
+             background-color: var(--code-bg);
+             border: 1px solid var(--card-border);
+             border-radius: 0.5rem;
+             margin-bottom: 1rem;
+             font-size: 0.9em;
+             color: var(--code-text);
+             overflow: hidden; /* Hide horizontal overflow until hovered/focused */
+         }
+         .markdown-content pre code {
+             display: block;
+             padding: 1rem;
+             overflow-x: auto; /* Enable horizontal scroll on the code itself */
+             background: none !important; /* Override potential highlight.js background */
+             font-family: Consolas, Monaco, 'Andale Mono', 'Ubuntu Mono', monospace;
+             white-space: pre; /* Ensure whitespace is preserved */
+         }
+          /* --- Copy Button Styling --- */
+         .markdown-content pre .copy-code-btn {
             position: absolute;
+            top: 0.5rem;
+            right: 0.5rem;
+            background-color: var(--copy-btn-bg);
             border: 1px solid var(--card-border);
+            color: var(--copy-btn-text);
+            padding: 0.3rem 0.6rem;
+            border-radius: 0.25rem;
+            cursor: pointer;
+            opacity: 0; /* Initially hidden */
+            transition: opacity 0.2s, background-color 0.2s;
             font-size: 0.8em;
+            display: flex; /* For icon alignment */
+            align-items: center;
+            gap: 0.25rem;
+         }
+         .markdown-content pre .copy-code-btn:hover {
+             background-color: var(--copy-btn-hover-bg);
+         }
+         .markdown-content pre:hover .copy-code-btn {
+             opacity: 1; /* Show on hover */
+         }
+        /* --- Inline Code Styling --- */
+         .markdown-content code:not(pre code) {
+             background-color: rgba(110, 118, 129, 0.4);
+             padding: 0.2em 0.4em;
+             margin: 0 0.1em; /* Add slight horizontal margin */
+             font-size: 85%;
+             border-radius: 6px;
+             font-family: Consolas, Monaco, 'Andale Mono', 'Ubuntu Mono', monospace;
+         }
+        /* --- End Markdown Styling --- */
+         .tts-button-loader {
+             width: 16px;
+             height: 16px;
+             border: 2px solid currentColor;
+             border-radius: 50%;
+             display: inline-block;
+             box-sizing: border-box;
+             animation: rotation 0.8s linear infinite;
+             border-bottom-color: transparent;
+         }
+         /* --- Style for TTS controls container --- */
+         .tts-controls {
+             display: flex;
+             align-items: center;
+             gap: 0.5rem; /* Space between play and speed buttons */
+             margin-top: 0.5rem;
+         }
+         /* --- Style for Speed Cycle Button --- */
+         .speed-cycle-btn {
+             padding: 0.25rem 0.6rem; /* Smaller padding */
+             font-size: 0.75rem; /* Smaller text */
+             background-color: #4b5563; /* Grey background */
+             color: #e5e7eb; /* Light text */
+             border-radius: 9999px; /* Pill shape */
+             border: none;
+             cursor: pointer;
+             transition: background-color 0.2s;
+             white-space: nowrap; /* Prevent text wrapping */
+             margin-top: 0.5rem;
+         }
+         .speed-cycle-btn:hover {
+             background-color: #1f0bb8e6; /* Lighter grey on hover */
+         }
+         .speed-cycle-btn:disabled {
+              opacity: 0.5;
+              cursor: not-allowed;
+         }
+         /* --- Select dropdown styles (kept for consistency if needed elsewhere) --- */
+         .select-wrapper {
+             position: relative;
+         }
+         .select-wrapper select {
+             background-color: var(--select-bg);
+             border: 1px solid var(--select-border);
+             color: var(--select-text);
+             padding: 0.75rem 2.5rem 0.75rem 1rem;
+             border-radius: 0.75rem;
+             font-size: 0.875rem;
+             width: 100%;
+             appearance: none;
+             -webkit-appearance: none;
+             transition: all 0.2s ease-in-out;
+             cursor: pointer;
+             background-image: url("data:image/svg+xml,%3csvg xmlns='http://www.w3.org/2000/svg' fill='none' viewBox='0 0 20 20'%3e%3cpath stroke='%239ca3af' stroke-linecap='round' stroke-linejoin='round' stroke-width='1.5' d='M6 8l4 4 4-4'/%3e%3c/svg%3e");
+             background-position: right 0.75rem center;
+             background-repeat: no-repeat;
+             background-size: 1.25em 1.25em;
+         }
+     </style>
+ </head>
+ <body class="w-screen h-screen dark">
+     <main id="main-content" class="h-full flex flex-col transition-opacity duration-500">
+         <div id="chat-container" class="hidden flex-1 flex flex-col w-full mx-auto overflow-hidden">
+             <header class="p-4 border-b border-[var(--card-border)] flex-shrink-0 flex justify-between items-center w-full">
+                 <div class="w-1/4"></div> <div class="w-1/2 text-center">
+                     <h1 class="text-xl font-medium tracking-wide">CogniChat ✨</h1>
+                     <p id="chat-filename" class="text-xs text-gray-400 mt-1 truncate"></p>
+                 </div>
+                 <div id="chat-session-info" class="w-1/4 text-right text-xs space-y-1 pr-4">
+                     </div>
+             </header>
+             <div id="chat-window" class="flex-1 overflow-y-auto p-4 md:p-6 lg:p-10">
+                 <div id="chat-content" class="max-w-4xl mx-auto space-y-8"></div>
+             </div>
+             <div class="p-4 flex-shrink-0 bg-opacity-50 backdrop-blur-md border-t border-[var(--card-border)]">
+                 <form id="chat-form" class="max-w-4xl mx-auto bg-[var(--card)] rounded-full p-2 flex items-center shadow-lg border border-[var(--card-border)] focus-within:ring-2 focus-within:ring-[var(--primary)] transition-all">
+                     <input type="text" id="chat-input" placeholder="Ask a question about your documents..." class="flex-grow bg-transparent focus:outline-none px-4 text-sm" autocomplete="off">
+                     <button type="submit" id="chat-submit-btn" class="bg-[var(--primary)] hover:bg-[var(--primary-hover)] text-white p-2.5 rounded-full transition-all duration-200 disabled:opacity-50 disabled:cursor-not-allowed" title="Send">
+                         <svg class="w-5 h-5" viewBox="0 0 20 20" fill="currentColor"><path fill-rule="evenodd" d="M10 18a8 8 0 100-16 8 8 0 000 16zm3.707-8.707l-3-3a1 1 0 00-1.414 1.414L10.586 9H7a1 1 0 100 2h3.586l-1.293 1.293a1 1 0 101.414 1.414l3-3a1 1 0 000-1.414z" clip-rule="evenodd"></path></svg>
+                     </button>
+                 </form>
+             </div>
+         </div>
+         <div id="upload-container" class="flex-1 flex flex-col items-center justify-center p-8 transition-opacity duration-300">
+             <div class="text-center max-w-xl w-full">
+                 <h1 class="text-5xl font-bold mb-3 tracking-tight">CogniChat ✨</h1>
+                 <p class="text-lg text-gray-400 mb-8">Upload your documents to start a conversation.</p>
+                 <div class="mb-8 p-5 bg-[var(--card)] rounded-2xl border border-[var(--card-border)] shadow-lg">
+                     <div class="flex flex-col sm:flex-row items-center gap-6">
+                         <div class="w-full sm:w-1/2">
+                             <div class="flex items-center gap-2 mb-2">
+                                 <svg class="w-5 h-5 text-gray-400" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 20 20" fill="currentColor"><path d="M7 3a1 1 0 000 2h6a1 1 0 100-2H7zM4 7a1 1 0 011-1h10a1 1 0 110 2H5a1 1 0 01-1-1zM2 11a2 2 0 012-2h12a2 2 0 012 2v4a2 2 0 01-2 2H4a2 2 0 01-2-2v-4z" /></svg>
+                                 <label for="model-select" class="block text-sm font-medium text-gray-300">Model</label>
+                             </div>
+                             <div class="select-wrapper">
+                                 <select id="model-select" name="model_name">
+                                     <option value="moonshotai/kimi-k2-instruct" selected>Kimi Instruct</option>
+                                     <option value="openai/gpt-oss-20b">GPT OSS 20b</option>
+                                     <option value="llama-3.3-70b-versatile">Llama 3.3 70b</option>
+                                     <option value="llama-3.1-8b-instant">Llama 3.1 8b Instant</option>
+                                 </select>
+                             </div>
+                         </div>
+                         <div class="w-full sm:w-1/2">
+                              <div class="flex items-center gap-2 mb-2">
+                                 <svg class="w-5 h-5 text-gray-400" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 20 20" fill="currentColor"><path fill-rule="evenodd" d="M5.5 16a3.5 3.5 0 100-7 3.5 3.5 0 000 7zM12 5.5a3.5 3.5 0 11-7 0 3.5 3.5 0 017 0zM14.5 16a3.5 3.5 0 100-7 3.5 3.5 0 000 7z" clip-rule="evenodd" /></svg>
+                                 <label for="temperature-select" class="block text-sm font-medium text-gray-300">Mode</label>
+                             </div>
+                              <div class="select-wrapper">
+                                 <select id="temperature-select" name="temperature">
+                                     <option value="0.2" selected>0.2 - Precise</option>
+                                     <option value="0.4">0.4 - Confident</option>
+                                     <option value="0.6">0.6 - Balanced</option>
+                                     <option value="0.8">0.8 - Flexible</option>
+                                     <option value="1.0">1.0 - Creative</option>
+                                 </select>
+                             </div>
+                         </div>
+                         </div>
+                     <p class="text-xs text-gray-500 mt-4 text-center">Higher creativity modes may reduce factual accuracy.</p>
+                 </div>
+                 <div id="drop-zone" class="w-full text-center border-2 border-dashed border-[var(--card-border)] rounded-2xl p-10 transition-all duration-300 cursor-pointer hover:bg-[var(--card)] hover:border-[var(--primary)]">
+                     <div class="flex flex-col items-center justify-center pointer-events-none">
+                         <svg class="mx-auto h-12 w-12 text-gray-500" fill="none" viewBox="0 0 24 24" stroke="currentColor"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="1.5" d="M12 16.5V9.75m0 0l3-3m-3 3l-3 3M6.75 19.5a4.5 4.5 0 01-1.41-8.775 5.25 5.25 0 0110.233-2.33 3 3 0 013.758 3.848A3.752 3.752 0 0118 19.5H6.75z"></path></svg>
+                         <p class="mt-4 text-sm font-medium text-gray-400">Drag & drop files or <span class="text-[var(--primary)] font-semibold">click to upload</span></p>
+                         <p class="text-xs text-gray-400 mt-1">Supports PDF, DOCX, TXT</p>
+                         <p id="file-name" class="mt-2 text-xs text-gray-500"></p>
+                     </div>
+                     <input id="file-upload" type="file" class="hidden" accept=".pdf,.txt,.docx" multiple>
+                 </div>
+             </div>
+         </div>
+         <div id="loading-overlay" class="hidden fixed inset-0 bg-[var(--background)] bg-opacity-80 backdrop-blur-sm flex flex-col items-center justify-center z-50">
+             <div class="loader"></div>
+             <p id="loading-text" class="mt-6 text-sm font-medium"></p>
+             <p id="loading-subtext" class="mt-2 text-xs text-gray-400"></p>
+         </div>
+     </main>
+     <script>
+         document.addEventListener('DOMContentLoaded', () => {
+             const uploadContainer = document.getElementById('upload-container');
+             const chatContainer = document.getElementById('chat-container');
+             const dropZone = document.getElementById('drop-zone');
+             const fileUploadInput = document.getElementById('file-upload');
+             const fileNameSpan = document.getElementById('file-name');
+             const loadingOverlay = document.getElementById('loading-overlay');
+             const loadingText = document.getElementById('loading-text');
+             const loadingSubtext = document.getElementById('loading-subtext');
+             const chatForm = document.getElementById('chat-form');
+             const chatInput = document.getElementById('chat-input');
+             const chatSubmitBtn = document.getElementById('chat-submit-btn');
+             const chatWindow = document.getElementById('chat-window');
+             const chatContent = document.getElementById('chat-content');
+             const modelSelect = document.getElementById('model-select');
+             const temperatureSelect = document.getElementById('temperature-select');
+             const chatFilename = document.getElementById('chat-filename');
+             const chatSessionInfo = document.getElementById('chat-session-info');
+             let sessionId = sessionStorage.getItem('cognichat_session_id');
+             let currentModelInfo = JSON.parse(sessionStorage.getItem('cognichat_model_info'));
+             marked.setOptions({
+                 breaks: true,
+                 gfm: true,
+             });
+             if (sessionId && currentModelInfo) {
+                 console.log("Restoring session:", sessionId);
+                 uploadContainer.classList.add('hidden');
+                 chatContainer.classList.remove('hidden');
+                 chatFilename.innerHTML = `Chatting with: <strong class="font-semibold">${sessionStorage.getItem('cognichat_filename') || 'documents'}</strong>`;
+                 chatFilename.title = sessionStorage.getItem('cognichat_filename') || 'documents';
+                 chatSessionInfo.innerHTML = `
+                     <p>Model: ${currentModelInfo.simpleModelName}</p>
+                     <p>Mode: ${currentModelInfo.mode}</p>
+                     <button class="mt-1 text-xs text-blue-400 hover:text-blue-300 focus:outline-none" onclick="sessionStorage.clear(); location.reload();">New Chat</button>`;
+             }
+             // --- File Upload Logic ---
+             dropZone.addEventListener('click', () => fileUploadInput.click());
+             ['dragenter', 'dragover', 'dragleave', 'drop'].forEach(eventName => {
+                 dropZone.addEventListener(eventName, e => {e.preventDefault(); e.stopPropagation();}, false);
+                 document.body.addEventListener(eventName, e => {e.preventDefault(); e.stopPropagation();}, false);
+             });
+             ['dragenter', 'dragover'].forEach(eventName => dropZone.addEventListener(eventName, () => dropZone.classList.add('drop-zone--over')));
+             ['dragleave', 'drop'].forEach(eventName => dropZone.addEventListener(eventName, () => dropZone.classList.remove('drop-zone--over')));
+             dropZone.addEventListener('drop', (e) => {
+                 if (e.dataTransfer.files.length > 0) handleFiles(e.dataTransfer.files);
+             });
+             fileUploadInput.addEventListener('change', (e) => {
+                 if (e.target.files.length > 0) handleFiles(e.target.files);
+             });
+             async function handleFiles(files) {
+                 const formData = new FormData();
+                 let fileNames = Array.from(files).map(f => f.name);
+                 for (const file of files) { formData.append('file', file); }
+                 formData.append('model_name', modelSelect.value);
+                 formData.append('temperature', temperatureSelect.value);
+                 fileNameSpan.textContent = `Selected: ${fileNames.join(', ')}`;
+                 await uploadAndProcessFiles(formData);
+             }
+             async function uploadAndProcessFiles(formData) {
+                 loadingOverlay.classList.remove('hidden');
+                 loadingText.textContent = `Processing document(s)...`;
+                 loadingSubtext.textContent = "Creating a knowledge base... this might take a minute 🧠";
+                 chatContent.innerHTML = ''; // Clear previous chat content on new upload
+                 try {
+                     const response = await fetch('/upload', { method: 'POST', body: formData });
+                     const result = await response.json();
+                     if (!response.ok) throw new Error(result.message || 'Unknown error occurred during upload.');
+                     sessionId = result.session_id;
+                     sessionStorage.setItem('cognichat_session_id', sessionId);
+                     const modelOption = modelSelect.querySelector(`option[value="${result.model_name}"]`);
+                     const simpleModelName = modelOption ? modelOption.textContent : result.model_name; // Adjust if needed
+                     currentModelInfo = {
+                         model: result.model_name,
+                         mode: result.mode,
+                         simpleModelName: simpleModelName // Use the derived simpler name
+                     };
+                     sessionStorage.setItem('cognichat_model_info', JSON.stringify(currentModelInfo)); // Store model info
+                     sessionStorage.setItem('cognichat_filename', result.filename); // Store filename
+                     chatFilename.innerHTML = `Chatting with: <strong class="font-semibold">${result.filename}</strong>`;
+                     chatFilename.title = result.filename;
+                     chatSessionInfo.innerHTML = `
+                         <p>Model: ${currentModelInfo.simpleModelName}</p>
+                         <p>Mode: ${currentModelInfo.mode}</p>
+                         <button class="mt-1 text-xs text-blue-400 hover:text-blue-300 focus:outline-none" onclick="sessionStorage.clear(); location.reload();">New Chat</button>`;
+                     uploadContainer.classList.add('hidden');
+                     chatContainer.classList.remove('hidden');
+                     appendMessage("Hello! 👋 I've analyzed your documents. What would you like to know?", "bot", currentModelInfo);
+                 } catch (error) {
+                     console.error('Upload error:', error);
+                     alert(`Error processing files: ${error.message}`);
+                     sessionStorage.clear(); // Clear session if upload fails
+                 } finally {
+                     loadingOverlay.classList.add('hidden');
+                     fileNameSpan.textContent = '';
+                     fileUploadInput.value = '';
+                 }
+             }
+             // --- Chat Logic (Using Server-Sent Events - UPDATED FOR STREAMING & INDICATOR) ---
+             chatForm.addEventListener('submit', async (e) => {
+                 e.preventDefault();
+                 const question = chatInput.value.trim();
+                 if (!question || !sessionId) {
+                     console.warn("Submit ignored: No question or session ID.");
+                     return;
+                 }
+                 appendMessage(question, 'user');
+                 chatInput.value = '';
+                 chatInput.disabled = true;
+                 chatSubmitBtn.disabled = true;
+                 let botMessageContainer;
+                 let contentDiv;
+                 let fullResponse = '';
+                 let eventSource = null;
+                 let inactivityTimeout = null;
+                 let streamClosedCleanly = false; // Flag to check if stream ended normally vs error
+                 let typingIndicatorElement = null; // Store indicator element
+                 // Function to finalize chat (called on error, timeout, or successful completion)
+                 function finalizeChat(isError = false) {
+                     console.log(`Finalizing chat. Was error: ${isError}, Stream ended cleanly: ${streamClosedCleanly}`);
+                     if (eventSource) {
+                         eventSource.close();
+                         eventSource = null;
+                         console.log("SSE connection explicitly closed in finalizeChat.");
+                     }
+                     if (inactivityTimeout) {
+                         clearTimeout(inactivityTimeout);
+                         inactivityTimeout = null;
+                     }
+                     // Remove indicator if it's still there
+                     if (typingIndicatorElement && typingIndicatorElement.parentNode) {
+                         typingIndicatorElement.parentNode.removeChild(typingIndicatorElement);
+                         typingIndicatorElement = null;
+                     }
+                     if (botMessageContainer && contentDiv) {
+                        const hasErrorMsg = contentDiv.innerHTML.includes('⚠️');
+                        // Ensure final render, apply copy buttons and TTS ONLY if response wasn't an error
+                        if (!hasErrorMsg && fullResponse) {
+                            // Re-parse the complete response to ensure correct final Markdown
+                            contentDiv.innerHTML = marked.parse(fullResponse);
+                            // Apply final touches like copy buttons and TTS
+                            contentDiv.querySelectorAll('pre').forEach(addCopyButton);
+                            addTextToSpeechControls(botMessageContainer, fullResponse);
+                            // Optional: Final highlighting if using highlight.js
+                            // contentDiv.querySelectorAll('pre code').forEach(block => hljs.highlightElement(block));
+                        }
+                         scrollToBottom(true); // Ensure scrolled to the end
+                     }
+                     // Always re-enable input fields
+                     chatInput.disabled = false;
+                     chatSubmitBtn.disabled = false;
+                     chatInput.focus();
+                 }
+                 try {
+                     // Create the bot message container *before* starting the stream
+                     botMessageContainer = appendMessage('', 'bot', currentModelInfo); // Append empty bot message
+                     contentDiv = botMessageContainer.querySelector('.markdown-content');
+                     // Show typing indicator *inside* the contentDiv
+                     typingIndicatorElement = showTypingIndicator();
+                     if (contentDiv) {
+                         contentDiv.appendChild(typingIndicatorElement);
+                         scrollToBottom(true); // Scroll to show indicator
+                     } else {
+                         console.error("Could not find contentDiv to append typing indicator.");
+                     }
+                     // Establish SSE connection via GET request
+                     const chatUrl = `/chat?question=${encodeURIComponent(question)}&session_id=${encodeURIComponent(sessionId)}`;
+                     console.log("Connecting to SSE:", chatUrl);
+                     eventSource = new EventSource(chatUrl);
+                     eventSource.onopen = () => {
+                         console.log("SSE Connection opened.");
+                         // Remove indicator when connection opens and stream is about to start
+                         if (typingIndicatorElement && typingIndicatorElement.parentNode) {
+                            typingIndicatorElement.parentNode.removeChild(typingIndicatorElement);
+                            typingIndicatorElement = null;
+                         }
+                         streamClosedCleanly = false; // Reset flag on new connection
+                     };
+                     eventSource.onmessage = (event) => {
+                         // Remove indicator on first message just in case onopen didn't fire reliably
+                          if (typingIndicatorElement && typingIndicatorElement.parentNode) {
+                            typingIndicatorElement.parentNode.removeChild(typingIndicatorElement);
+                            typingIndicatorElement = null;
+                         }
+                         // Reset inactivity timeout on each message
+                         if (inactivityTimeout) clearTimeout(inactivityTimeout);
+                         inactivityTimeout = setTimeout(() => {
+                             console.log("Inactivity timeout triggered after message.");
+                             streamClosedCleanly = true; // Assume normal end
+                             finalizeChat(false);
+                         }, 5000); // 5 seconds of inactivity
+                         let data;
+                         try {
+                              data = JSON.parse(event.data);
+                         } catch (parseError){
+                             console.error("Failed to parse SSE data:", event.data, parseError);
+                             contentDiv.innerHTML += `<p class="text-red-400 text-sm">Error receiving data chunk.</p>`;
+                             return;
+                         }
+                         if (data.error) {
+                             console.error('SSE Error from server:', data.error);
+                             contentDiv.innerHTML = `<p class="text-red-500 font-semibold">⚠️ Server Error: ${data.error}</p>`;
+                             streamClosedCleanly = false;
+                             finalizeChat(true); // Pass true for error
+                             return;
+                         }
+                         if (data.token !== undefined && data.token !== null) {
+                             fullResponse += data.token;
+                             // Update content by parsing the accumulated response
+                             contentDiv.innerHTML = marked.parse(fullResponse);
+                             scrollToBottom(); // Scroll smoothly as content arrives
+                         }
+                     };
+                     eventSource.onerror = (error) => {
+                         console.error('SSE connection error event:', error);
+                         // Remove indicator on error
+                         if (typingIndicatorElement && typingIndicatorElement.parentNode) {
+                             typingIndicatorElement.parentNode.removeChild(typingIndicatorElement);
+                             typingIndicatorElement = null;
+                         }
+                         // Don't show generic error if we received data and the stream likely just closed normally
+                         if (!fullResponse && !streamClosedCleanly) { // Only show error if nothing received AND not already cleanly closed
+                             const errorMsg = "⚠️ Connection error. Please try again.";
+                             if (contentDiv) {
+                                 contentDiv.innerHTML = `<p class="text-red-500 font-semibold">${errorMsg}</p>`;
+                             } else {
+                                 // Fallback if container wasn't created somehow
+                                 appendMessage(errorMsg, 'bot', currentModelInfo); // Pass model info here too
+                             }
+                              streamClosedCleanly = false;
+                         } else if (!streamClosedCleanly) {
+                              // If we received data, assume it's a normal closure misinterpreted as error
+                              console.log("SSE connection closed (likely normal end detected by onerror).");
+                              streamClosedCleanly = true; // Mark as clean closure NOW
+                         } else {
+                              console.log("SSE onerror event after stream already marked cleanly closed.")
+                         }
+                         finalizeChat(!streamClosedCleanly); // Finalize, indicate error if not clean
+                     };
+                 } catch (error) {
+                     // For setup errors before SSE starts
+                     console.error('Chat setup error:', error);
+                      // Remove indicator on setup error
+                     if (typingIndicatorElement && typingIndicatorElement.parentNode) {
+                         typingIndicatorElement.parentNode.removeChild(typingIndicatorElement);
+                         typingIndicatorElement = null;
+                     }
+                      if (botMessageContainer && contentDiv) {
+                          contentDiv.innerHTML = `<p class="text-red-500 font-semibold">⚠️ Error starting chat: ${error.message}</p>`;
+                     } else {
+                         appendMessage(`Error starting chat: ${error.message}`, 'bot', currentModelInfo); // Pass model info
+                     }
+                      finalizeChat(true);
+                 }
+             });
+             // --- UI Helper Functions ---
+             function appendMessage(text, sender, modelInfo = null) {
+                 const messageWrapper = document.createElement('div');
+                 const iconSVG = sender === 'user'
+                     ? `<div class="bg-blue-200 dark:bg-gray-700 p-2.5 rounded-full flex-shrink-0 mt-1 self-start"><svg class="w-5 h-5 text-blue-700 dark:text-blue-300" viewBox="0 0 24 24"><path fill="currentColor" d="M12 12c2.21 0 4-1.79 4-4s-1.79-4-4-4-4 1.79-4 4 1.79 4 4 4zm0 2c-2.67 0-8 1.34-8 4v2h16v-2c0-2.66-5.33-4-8-4z"></path></svg></div>`
+                     : `<div class="bg-gray-200 dark:bg-gray-700 rounded-full flex-shrink-0 mt-1 self-start text-xl flex items-center justify-center w-10 h-10">✨</div>`;
+                 let senderHTML;
+                 if (sender === 'user') {
+                     senderHTML = '<p class="font-medium text-sm mb-1">You</p>';
+                 } else {
+                     let modelInfoHTML = '';
+                     const displayInfo = modelInfo || currentModelInfo;
+                     if (displayInfo && displayInfo.simpleModelName) {
+                         modelInfoHTML = `
+                             <span class="ml-2 text-xs font-normal text-gray-400">
+                                 (Model: ${displayInfo.simpleModelName} | Mode: ${displayInfo.mode})
+                             </span>
+                         `;
+                     }
+                     senderHTML = `<div class="font-medium text-sm mb-1 flex items-center">CogniChat ${modelInfoHTML}</div>`;
+                 }
+                 messageWrapper.className = `flex items-start gap-3`;
+                 // Ensure markdown-content div exists even if text is empty for the indicator
+                 messageWrapper.innerHTML = `
+                     ${iconSVG}
+                     <div class="flex-1 pt-1 min-w-0"> ${senderHTML}
+                         <div class="text-base markdown-content prose dark:prose-invert max-w-none">${text ? marked.parse(text) : ''}</div>
+                         <div class="tts-controls mt-2"></div>
+                     </div>
+                 `;
+                 chatContent.appendChild(messageWrapper);
+                 // Force scroll only when adding user message or initial bot message with content
+                 if (sender === 'user' || text) {
+                     scrollToBottom(true);
+                 }
+                 // Return the container that holds the sender name and content div
+                 return messageWrapper.querySelector('.flex-1');
+             }
+             // --- UPDATED showTypingIndicator ---
+             function showTypingIndicator() {
+                 const indicator = document.createElement('div');
+                 indicator.className = 'typing-indicator'; // Use the main class
+                 indicator.innerHTML = '<span></span><span></span><span></span>';
+                 // Don't append here, just return the element
+                 return indicator;
+             }
+             // --- End UPDATED showTypingIndicator ---
+            function scrollToBottom(force = false) {
+                 const isNearBottom = chatWindow.scrollHeight - chatWindow.clientHeight <= chatWindow.scrollTop + 150; // Threshold
+                 if (force || isNearBottom) {
+                     requestAnimationFrame(() => { // Use rAF for smoother render loop
+                         chatWindow.scrollTo({
+                             top: chatWindow.scrollHeight,
+                             behavior: 'smooth'
+                         });
+                     });
+                 }
+             }
+             function addCopyButton(pre) {
+                 if (pre.querySelector('.copy-code-btn')) return;
+                 const button = document.createElement('button');
+                 // Updated classes for better styling
+                 button.className = 'copy-code-btn absolute top-2 right-2 p-1 rounded bg-[var(--copy-btn-bg)] text-[var(--copy-btn-text)] hover:bg-[var(--copy-btn-hover-bg)] transition-opacity duration-200 flex items-center gap-1 text-xs';
+                 button.innerHTML = `<svg class="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M8 16H6a2 2 0 01-2-2V6a2 2 0 012-2h8a2 2 0 012 2v2m-6 12h8a2 2 0 002-2v-8a2 2 0 00-2-2h-8a2 2 0 00-2 2v8a2 2 0 002 2z"></path></svg> Copy`;
+                 pre.style.position = 'relative'; // Ensure parent is relative for absolute positioning
+                 pre.appendChild(button);
+                 button.addEventListener('click', () => {
+                     const code = pre.querySelector('code')?.innerText || '';
+                     navigator.clipboard.writeText(code)
+                         .then(() => {
+                             button.textContent = 'Copied!';
+                             setTimeout(() => button.innerHTML = `<svg class="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M8 16H6a2 2 0 01-2-2V6a2 2 0 012-2h8a2 2 0 012 2v2m-6 12h8a2 2 0 002-2v-8a2 2 0 00-2-2h-8a2 2 0 00-2 2v8a2 2 0 002 2z"></path></svg> Copy`, 1500);
+                         })
+                         .catch(err => {
+                             console.error('Failed to copy code: ', err);
+                             button.textContent = 'Error';
+                             setTimeout(() => button.innerHTML = `<svg class="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M8 16H6a2 2 0 01-2-2V6a2 2 0 012-2h8a2 2 0 012 2v2m-6 12h8a2 2 0 002-2v-8a2 2 0 00-2-2h-8a2 2 0 00-2 2v8a2 2 0 002 2z"></path></svg> Copy`, 1500);
+                         });
+                 });
+             }
+             // --- TTS Functions (UPDATED FOR SPEED CYCLE) ---
+             let currentAudio = null;
+             let currentPlayingButton = null; // Stores the currently active *play/pause* button
+             const playIconSVG = `<svg class="w-4 h-4" fill="currentColor" viewBox="0 0 20 20"><path d="M6.3 2.841A1.5 1.5 0 004 4.11V15.89a1.5 1.5 0 002.3 1.269l9.344-5.89a1.5 1.5 0 000-2.538L6.3 2.84z"></path></svg>`;
+             const pauseIconSVG = `<svg class="w-4 h-4" fill="currentColor" viewBox="0 0 20 20"><path d="M5.75 4.75a.75.75 0 00-.75.75v9.5c0 .414.336.75.75.75h1.5a.75.75 0 00.75-.75v-9.5a.75.75 0 00-.75-.75h-1.5zm6.5 0a.75.75 0 00-.75.75v9.5c0 .414.336.75.75.75h1.5a.75.75 0 00.75-.75v-9.5a.75.75 0 00-.75-.75h-1.5z"></path></svg>`;
+             const availableSpeeds = [1.0, 1.5, 0.75]; // Normal, Fast, Slow
+             // --- UPDATED: addTextToSpeechControls ---
+             function addTextToSpeechControls(messageBubble, text) {
+                 if (!text || !text.trim()) return;
+                 const ttsControls = messageBubble.querySelector('.tts-controls');
+                 if (!ttsControls || ttsControls.querySelector('.speak-btn')) return; // Avoid adding duplicates
+                 // Play/Pause Button
+                 const speakButton = document.createElement('button');
+                 speakButton.className = 'speak-btn mt-2 px-3 py-1.5 bg-blue-700 text-white rounded-full text-xs font-medium hover:bg-blue-800 transition-colors flex items-center gap-1.5 disabled:opacity-50 disabled:cursor-not-allowed';
+                 speakButton.title = 'Listen to this message';
+                 speakButton.innerHTML = `${playIconSVG} <span>Listen</span>`;
+                 speakButton.setAttribute('data-current-speed', '1.0'); // Store current speed
+                 ttsControls.appendChild(speakButton);
+                 speakButton.addEventListener('click', () => handleTTS(text, speakButton));
+                 // Speed Cycle Button
+                 const speedButton = document.createElement('button');
+                 speedButton.className = 'speed-cycle-btn'; // Use new class for styling
+                 speedButton.title = 'Cycle playback speed';
+                 speedButton.textContent = 'Speed: 1x';
+                 speedButton.setAttribute('data-speeds', JSON.stringify(availableSpeeds)); // Store speeds
+                 ttsControls.appendChild(speedButton);
+                 speedButton.addEventListener('click', () => cycleSpeed(speedButton, speakButton));
+             }
+             // --- NEW: cycleSpeed ---
+             function cycleSpeed(speedBtn, speakBtn) {
+                 const speeds = JSON.parse(speedBtn.getAttribute('data-speeds'));
+                 let currentSpeed = parseFloat(speakBtn.getAttribute('data-current-speed'));
+                 let currentIndex = speeds.indexOf(currentSpeed);
+                 // Find next speed index, looping back to 0
+                 let nextIndex = (currentIndex + 1) % speeds.length;
+                 let nextSpeed = speeds[nextIndex];
+                 // Update speak button's data attribute and speed button's text
+                 speakBtn.setAttribute('data-current-speed', nextSpeed.toString());
+                 speedBtn.textContent = `Speed: ${nextSpeed}x`;
+                 // If audio is currently playing and this is the active button, update playback rate
+                 if (currentAudio && !currentAudio.paused && speakBtn === currentPlayingButton) {
+                     currentAudio.playbackRate = nextSpeed;
+                 }
+             }
+             // --- UPDATED: handleTTS ---
+             async function handleTTS(text, button) {
+                 if (!text || !text.trim()) return;
+                 // *** Get speed from the button's data attribute ***
+                 const selectedSpeed = parseFloat(button.getAttribute('data-current-speed')) || 1.0;
+                 if (button === currentPlayingButton) { // If clicking the currently active play/pause button
+                     if (currentAudio && !currentAudio.paused) { // If playing, pause it
+                         currentAudio.pause();
+                         button.innerHTML = `${playIconSVG} <span>Listen</span>`;
+                     } else if (currentAudio && currentAudio.paused) { // If paused, resume it
+                         currentAudio.playbackRate = selectedSpeed; // Ensure speed is set on resume
+                         currentAudio.play().catch(e => {console.error("Audio resume error:", e); resetAllSpeakButtons();});
+                         button.innerHTML = `${pauseIconSVG} <span>Pause</span>`;
+                     }
+                     return;
+                 }
+                 // If clicking a new play button (or the first time)
+                 resetAllSpeakButtons(); // Stop any other audio
+                 currentPlayingButton = button; // Mark this button as active
+                 button.innerHTML = `<div class="tts-button-loader mr-1"></div> <span>Loading...</span>`;
+                 button.disabled = true;
+                 // Disable the corresponding speed button while loading
+                 const speedBtn = button.parentElement.querySelector('.speed-cycle-btn');
+                 if(speedBtn) speedBtn.disabled = true;
+                 try {
+                     const response = await fetch('/tts', {
+                         method: 'POST',
+                         headers: { 'Content-Type': 'application/json' },
+                         body: JSON.stringify({ text: text })
+                     });
+                     if (!response.ok) throw new Error(`TTS generation failed (${response.status})`);
+                     const blob = await response.blob();
+                     if (!blob || blob.size === 0) throw new Error("Received empty audio blob.");
+                     const audioUrl = URL.createObjectURL(blob);
+                     currentAudio = new Audio(audioUrl);
+                     // *** Set the playback speed HERE ***
+                     currentAudio.playbackRate = selectedSpeed;
+                     await currentAudio.play();
+                     button.innerHTML = `${pauseIconSVG} <span>Pause</span>`;
+                     button.disabled = false;
+                     // Re-enable the speed button
+                     if(speedBtn) speedBtn.disabled = false;
+                     currentAudio.onended = () => {
+                         // Only reset if this button was the one playing
+                         if (button === currentPlayingButton) resetAllSpeakButtons();
+                     };
+                     currentAudio.onerror = (e) => {
+                         console.error('Audio object error:', e);
+                         alert('Error playing audio.');
+                         resetAllSpeakButtons();
+                     };
+                 } catch (error) {
+                     console.error('TTS Handling Error:', error);
+                     alert(`Failed to play audio: ${error.message}`);
+                     resetAllSpeakButtons(); // Reset on error
+                 }
+             }
+             // --- UPDATED: resetAllSpeakButtons ---
+             function resetAllSpeakButtons() {
+                 document.querySelectorAll('.speak-btn').forEach(btn => {
+                     btn.innerHTML = `${playIconSVG} <span>Listen</span>`;
+                     btn.disabled = false;
+                     btn.setAttribute('data-current-speed', '1.0'); // Reset speed attribute
+                 });
+                 document.querySelectorAll('.speed-cycle-btn').forEach(btn => {
+                    btn.textContent = 'Speed: 1x'; // Reset speed button text
                     btn.disabled = false;
+                 });
+                 if (currentAudio) {
+                     currentAudio.pause();
+                     currentAudio.onended = null; // Clean up listeners
+                     currentAudio.onerror = null;
+                     currentAudio = null;
+                 }
+                 currentPlayingButton = null;
+             }
+             // Remove the resetSpecificButton function if it exists, it's integrated now.
+             // --- End of TTS Functions ---
+             // ... (keep the optional highlight.js part if you have it)
+         });
+     </script>
+ </body>
+ </html>