HYPERXD commited on
Commit
a939b00
Β·
1 Parent(s): 7f5c2f3
Files changed (4) hide show
  1. app.py +61 -17
  2. new_test.py +55 -0
  3. rag_processor.py +1 -0
  4. readme.md +556 -0
app.py CHANGED
@@ -6,14 +6,13 @@ import uuid
6
  from flask import Flask, request, render_template, session, jsonify, Response, stream_with_context
7
  from werkzeug.utils import secure_filename
8
  from rag_processor import create_rag_chain
 
9
 
10
- # ============================ ADDITIONS START ============================
11
  from gtts import gTTS
12
  import io
13
- import re # <-- Import the regular expression module
14
- # ============================ ADDITIONS END ==============================
15
 
16
- # Document Loaders
17
  from langchain_community.document_loaders import (
18
  TextLoader,
19
  PyPDFLoader,
@@ -22,28 +21,56 @@ from langchain_community.document_loaders import (
22
 
23
  # Additional imports for robust PDF handling
24
  from langchain_core.documents import Document
25
- import fitz # PyMuPDF for alternative PDF processing
26
 
27
  # Text Splitter, Embeddings, Retrievers
28
  from langchain.text_splitter import RecursiveCharacterTextSplitter
29
- from langchain_community.embeddings import HuggingFaceEmbeddings
30
  from langchain_community.vectorstores import FAISS
31
- from langchain.retrievers import EnsembleRetriever
 
32
  from langchain_community.retrievers import BM25Retriever
33
  from langchain_community.chat_message_histories import ChatMessageHistory
 
 
 
34
 
35
- # --- Basic Flask App Setup ---
36
  app = Flask(__name__)
37
  app.config['SECRET_KEY'] = os.urandom(24)
38
 
39
- # Use /tmp directory for uploads in HF Spaces (writable), fallback to local uploads for development
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  is_hf_spaces = bool(os.getenv("SPACE_ID") or os.getenv("SPACES_ZERO_GPU"))
41
  if is_hf_spaces:
42
  app.config['UPLOAD_FOLDER'] = '/tmp/uploads'
43
  else:
44
  app.config['UPLOAD_FOLDER'] = 'uploads'
45
 
46
- # Create upload directory with proper error handling
47
  try:
48
  os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)
49
  print(f"βœ“ Upload folder ready: {app.config['UPLOAD_FOLDER']}")
@@ -54,21 +81,23 @@ except Exception as e:
54
  os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)
55
  print(f"βœ“ Using fallback upload folder: {app.config['UPLOAD_FOLDER']}")
56
 
57
- # --- In-memory Storage & Global Model Loading ---
 
 
 
58
  rag_chains = {}
59
  message_histories = {}
60
 
61
- # Load the embedding model once when the application starts for efficiency.
62
  print("Loading embedding model...")
63
 
64
- # Set environment variables for HuggingFace cache (use home directory if available)
65
  cache_base = os.path.expanduser("~/.cache") if os.path.expanduser("~") != "~" else "/tmp/hf_cache"
66
  os.environ.setdefault('HF_HOME', f'{cache_base}/huggingface')
67
  os.environ.setdefault('HF_HUB_CACHE', f'{cache_base}/huggingface/hub')
68
  os.environ.setdefault('TRANSFORMERS_CACHE', f'{cache_base}/transformers')
69
  os.environ.setdefault('SENTENCE_TRANSFORMERS_HOME', f'{cache_base}/sentence_transformers')
70
 
71
- # Create cache directories with proper permissions
72
  cache_dirs = [
73
  os.environ['HF_HOME'],
74
  os.environ['HF_HUB_CACHE'],
@@ -103,6 +132,8 @@ for cache_dir in cache_dirs:
103
  except Exception as e:
104
  print(f"Warning: Could not create {cache_dir}: {e}")
105
 
 
 
106
  # Try loading embedding model with error handling and fallbacks
107
  try:
108
  print("Attempting to load embedding model...")
@@ -135,6 +166,13 @@ except Exception as e:
135
  print(f"Final attempt failed: {e3}")
136
  # Use a simpler fallback model or raise the error
137
  raise Exception(f"Could not load any embedding model. Last error: {e3}")
 
 
 
 
 
 
 
138
 
139
  def load_pdf_with_fallback(filepath):
140
  """
@@ -336,12 +374,19 @@ def upload_files():
336
  retrievers=[bm25_retriever, faiss_retriever],
337
  weights=[0.5, 0.5]
338
  )
 
 
 
 
 
 
 
339
 
340
  session_id = str(uuid.uuid4())
341
  print(f"Creating RAG chain for session {session_id}...")
342
 
343
  try:
344
- rag_chain = create_rag_chain(ensemble_retriever, get_session_history)
345
  rag_chains[session_id] = rag_chain
346
  print(f"βœ“ RAG chain created successfully for session {session_id} with {len(processed_files)} documents.")
347
  except Exception as rag_error:
@@ -443,7 +488,6 @@ def chat():
443
  print(f"Error during chat invocation: {e}")
444
  return Response("An error occurred while getting the answer.", status=500, mimetype='text/plain')
445
 
446
- # ============================ ADDITIONS START ============================
447
 
448
  def clean_markdown_for_tts(text: str) -> str:
449
  """Removes markdown formatting for cleaner text-to-speech output."""
@@ -484,7 +528,7 @@ def text_to_speech():
484
  except Exception as e:
485
  print(f"Error in TTS generation: {e}")
486
  return jsonify({'status': 'error', 'message': 'Failed to generate audio.'}), 500
487
- # ============================ ADDITIONS END ==============================
488
 
489
 
490
  @app.route('/debug', methods=['GET'])
 
6
  from flask import Flask, request, render_template, session, jsonify, Response, stream_with_context
7
  from werkzeug.utils import secure_filename
8
  from rag_processor import create_rag_chain
9
+ from typing import Sequence, Any
10
 
 
11
  from gtts import gTTS
12
  import io
13
+ import re
14
+
15
 
 
16
  from langchain_community.document_loaders import (
17
  TextLoader,
18
  PyPDFLoader,
 
21
 
22
  # Additional imports for robust PDF handling
23
  from langchain_core.documents import Document
24
+ import fitz
25
 
26
  # Text Splitter, Embeddings, Retrievers
27
  from langchain.text_splitter import RecursiveCharacterTextSplitter
28
+ from langchain_huggingface import HuggingFaceEmbeddings
29
  from langchain_community.vectorstores import FAISS
30
+ from langchain.retrievers import EnsembleRetriever, ContextualCompressionRetriever
31
+ from langchain.retrievers.document_compressors.base import BaseDocumentCompressor
32
  from langchain_community.retrievers import BM25Retriever
33
  from langchain_community.chat_message_histories import ChatMessageHistory
34
+ from sentence_transformers.cross_encoder import CrossEncoder
35
+ import numpy as np
36
+
37
 
 
38
  app = Flask(__name__)
39
  app.config['SECRET_KEY'] = os.urandom(24)
40
 
41
+
42
+ class LocalReranker(BaseDocumentCompressor):
43
+ model: Any
44
+ top_n: int = 5
45
+
46
+ class Config:
47
+ arbitrary_types_allowed = True
48
+
49
+ def compress_documents(
50
+ self,
51
+ documents: Sequence[Document],
52
+ query: str,
53
+ callbacks=None,
54
+ ) -> Sequence[Document]:
55
+ if not documents:
56
+ return []
57
+
58
+ pairs = [[query, doc.page_content] for doc in documents]
59
+ scores = self.model.predict(pairs, show_progress_bar=False)
60
+
61
+ doc_scores = list(zip(documents, scores))
62
+ sorted_doc_scores = sorted(doc_scores, key=lambda x: x[1], reverse=True)
63
+
64
+ return [doc for doc, score in sorted_doc_scores[:self.top_n]]
65
+
66
+
67
  is_hf_spaces = bool(os.getenv("SPACE_ID") or os.getenv("SPACES_ZERO_GPU"))
68
  if is_hf_spaces:
69
  app.config['UPLOAD_FOLDER'] = '/tmp/uploads'
70
  else:
71
  app.config['UPLOAD_FOLDER'] = 'uploads'
72
 
73
+
74
  try:
75
  os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)
76
  print(f"βœ“ Upload folder ready: {app.config['UPLOAD_FOLDER']}")
 
81
  os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)
82
  print(f"βœ“ Using fallback upload folder: {app.config['UPLOAD_FOLDER']}")
83
 
84
+
85
+
86
+
87
+
88
  rag_chains = {}
89
  message_histories = {}
90
 
 
91
  print("Loading embedding model...")
92
 
93
+
94
  cache_base = os.path.expanduser("~/.cache") if os.path.expanduser("~") != "~" else "/tmp/hf_cache"
95
  os.environ.setdefault('HF_HOME', f'{cache_base}/huggingface')
96
  os.environ.setdefault('HF_HUB_CACHE', f'{cache_base}/huggingface/hub')
97
  os.environ.setdefault('TRANSFORMERS_CACHE', f'{cache_base}/transformers')
98
  os.environ.setdefault('SENTENCE_TRANSFORMERS_HOME', f'{cache_base}/sentence_transformers')
99
 
100
+
101
  cache_dirs = [
102
  os.environ['HF_HOME'],
103
  os.environ['HF_HUB_CACHE'],
 
132
  except Exception as e:
133
  print(f"Warning: Could not create {cache_dir}: {e}")
134
 
135
+
136
+
137
  # Try loading embedding model with error handling and fallbacks
138
  try:
139
  print("Attempting to load embedding model...")
 
166
  print(f"Final attempt failed: {e3}")
167
  # Use a simpler fallback model or raise the error
168
  raise Exception(f"Could not load any embedding model. Last error: {e3}")
169
+
170
+
171
+
172
+ print("Loading local re-ranking model...")
173
+ RERANKER_MODEL = CrossEncoder("mixedbread-ai/mxbai-rerank-xsmall-v1", device='cpu')
174
+ print("Re-ranking model loaded successfully.")
175
+
176
 
177
  def load_pdf_with_fallback(filepath):
178
  """
 
374
  retrievers=[bm25_retriever, faiss_retriever],
375
  weights=[0.5, 0.5]
376
  )
377
+ reranker = LocalReranker(model=RERANKER_MODEL, top_n=3)
378
+
379
+ compression_retriever = ContextualCompressionRetriever(
380
+ base_compressor=reranker,
381
+ base_retriever=ensemble_retriever
382
+ )
383
+
384
 
385
  session_id = str(uuid.uuid4())
386
  print(f"Creating RAG chain for session {session_id}...")
387
 
388
  try:
389
+ rag_chain = create_rag_chain(compression_retriever, get_session_history)
390
  rag_chains[session_id] = rag_chain
391
  print(f"βœ“ RAG chain created successfully for session {session_id} with {len(processed_files)} documents.")
392
  except Exception as rag_error:
 
488
  print(f"Error during chat invocation: {e}")
489
  return Response("An error occurred while getting the answer.", status=500, mimetype='text/plain')
490
 
 
491
 
492
  def clean_markdown_for_tts(text: str) -> str:
493
  """Removes markdown formatting for cleaner text-to-speech output."""
 
528
  except Exception as e:
529
  print(f"Error in TTS generation: {e}")
530
  return jsonify({'status': 'error', 'message': 'Failed to generate audio.'}), 500
531
+
532
 
533
 
534
  @app.route('/debug', methods=['GET'])
new_test.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException, Query, Body
2
+ from typing import Optional
3
+ import httpx
4
+
5
+ app = FastAPI()
6
+
7
+ VAPI_API_URL = "https://api.vapi.ai/analytics"
8
+ VAPI_TOKEN = "0f4ef4f0-39be-4197-b5db-f5234ec064fa" # Replace with your actual Vapi API token
9
+
10
+ def build_analytics_payload(start: str, end: str):
11
+ return {
12
+ "queries": [
13
+ {
14
+ "table": "call",
15
+ "name": "assistant_analytics",
16
+ "operations": [
17
+ { "operation": "avg", "column": "duration", "alias": "average_duration" },
18
+ { "operation": "sum", "column": "duration", "alias": "total_duration" },
19
+ { "operation": "sum", "column": "cost", "alias": "total_cost" },
20
+ { "operation": "count", "column": "id", "alias": "call_count" }
21
+ ],
22
+ "groupBy": ["assistantId"],
23
+ "timeRange": {
24
+ "start": start,
25
+ "end": end
26
+ }
27
+ }
28
+ ]
29
+ }
30
+
31
+ @app.post("/assistant_analytics")
32
+ async def assistant_analytics(
33
+ start: Optional[str] = Query(None, description="Start date-time in ISO format, e.g. 2025-10-01T00:00:00Z"),
34
+ end: Optional[str] = Query(None, description="End date-time in ISO format, e.g. 2025-10-14T23:59:59Z"),
35
+ body: Optional[dict] = Body(None)
36
+ ):
37
+ # Allow the user to provide start/end either in query, or via POST body
38
+ if body and "start" in body and "end" in body:
39
+ start = body["start"]
40
+ end = body["end"]
41
+ if not start or not end:
42
+ raise HTTPException(status_code=400, detail="Must provide start and end times either as query or in body.")
43
+
44
+ payload = build_analytics_payload(start, end)
45
+ headers = {
46
+ "Authorization": VAPI_TOKEN,
47
+ "Content-Type": "application/json"
48
+ }
49
+
50
+ async with httpx.AsyncClient() as client:
51
+ response = await client.post(VAPI_API_URL, headers=headers, json=payload)
52
+ if response.status_code == 200:
53
+ return response.json()
54
+ else:
55
+ raise HTTPException(status_code=response.status_code, detail=response.text)
rag_processor.py CHANGED
@@ -83,6 +83,7 @@ Standalone Question:"""
83
  rag_template = """You are an expert assistant named `Cognichat`.Whenver user ask you about who you are , simply say you are `Cognichat`.
84
  You are developed by Ritesh and Alish.
85
  Your job is to provide accurate and helpful answers based ONLY on the provided context.
 
86
  If the information is not in the context, clearly state that you don't know the answer.
87
  Provide a clear and concise answer.
88
 
 
83
  rag_template = """You are an expert assistant named `Cognichat`.Whenver user ask you about who you are , simply say you are `Cognichat`.
84
  You are developed by Ritesh and Alish.
85
  Your job is to provide accurate and helpful answers based ONLY on the provided context.
86
+ Whatever the user ask,it is always about the document so based on the document only provide the answer.
87
  If the information is not in the context, clearly state that you don't know the answer.
88
  Provide a clear and concise answer.
89
 
readme.md ADDED
@@ -0,0 +1,556 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # πŸ€– CogniChat - Intelligent Document Chat System
2
+
3
+ <div align="center">
4
+
5
+ ![License](https://img.shields.io/badge/license-MIT-blue.svg)
6
+ ![Python](https://img.shields.io/badge/python-3.9+-brightgreen.svg)
7
+ ![Docker](https://img.shields.io/badge/docker-ready-blue.svg)
8
+ ![HuggingFace](https://img.shields.io/badge/πŸ€—-Spaces-yellow.svg)
9
+
10
+ **Transform your documents into interactive conversations powered by advanced RAG technology**
11
+
12
+ <p align="center">
13
+ <img src="Document_reader.gif" width="100%" alt="CogniChat Demo">
14
+ </p>
15
+
16
+ [Features](#-features) β€’ [Quick Start](#-quick-start) β€’ [Architecture](#-architecture) β€’ [Deployment](#-deployment) β€’ [API](#-api-reference)
17
+
18
+ </div>
19
+
20
+ ---
21
+
22
+ ## πŸ“‹ Table of Contents
23
+
24
+ - [Overview](#-overview)
25
+ - [Features](#-features)
26
+ - [Architecture](#-architecture)
27
+ - [Technology Stack](#-technology-stack)
28
+ - [Quick Start](#-quick-start)
29
+ - [Deployment](#-deployment)
30
+ - [Configuration](#-configuration)
31
+ - [API Reference](#-api-reference)
32
+ - [Troubleshooting](#-troubleshooting)
33
+ - [Contributing](#-contributing)
34
+ - [License](#-license)
35
+
36
+ ---
37
+
38
+ ## 🎯 Overview
39
+
40
+ CogniChat is a production-ready, intelligent document chat application that leverages **Retrieval Augmented Generation (RAG)** to enable natural conversations with your documents. Built with enterprise-grade technologies, it provides accurate, context-aware responses from your document corpus.
41
+
42
+ ### Why CogniChat?
43
+
44
+
45
+ - **πŸ”‰ Audio Overview of Your document**:Simply ask the question and listen the audio. Now your document can speak with you.
46
+ - **🎯 Accurate Retrieval**: Hybrid search combining BM25 and FAISS for optimal results
47
+ - **πŸ’¬ Conversational Memory**: Maintains context across multiple interactions
48
+ - **πŸ“„ Multi-Format Support**: Handles PDF, DOCX, TXT, and image files
49
+ - **πŸš€ Production Ready**: Docker support, comprehensive error handling, and security best practices
50
+ - **🎨 Modern UI**: Responsive design with dark mode and real-time streaming
51
+
52
+ ---
53
+
54
+ ## ✨ Features
55
+
56
+ ### Core Capabilities
57
+
58
+ | Feature | Description |
59
+ |---------|-------------|
60
+ | **Multi-Format Processing** | Upload and process PDF, DOCX, TXT, and image files |
61
+ | **Hybrid Search** | Combines BM25 (keyword) and FAISS (semantic) for superior retrieval |
62
+ | **Conversational AI** | Powered by Groq's Llama 3.1 for intelligent responses |
63
+ | **Memory Management** | Maintains chat history for contextual conversations |
64
+ | **Text-to-Speech** | Built-in TTS for audio playback of responses |
65
+ | **Streaming Responses** | Real-time token streaming for better UX |
66
+ | **Document Chunking** | Intelligent text splitting for optimal context windows |
67
+
68
+ ### Advanced Features
69
+
70
+ - **Semantic Embeddings**: HuggingFace `all-miniLM-L6-v2` for accurate vector representations
71
+ - **Reranking**: Contextual compression for improved relevance
72
+ - **Error Handling**: Comprehensive fallback mechanisms and error recovery
73
+ - **Security**: Non-root Docker execution and environment-based secrets
74
+ - **Scalability**: Optimized for both local and cloud deployments
75
+
76
+ ---
77
+
78
+ ## πŸ— Architecture
79
+
80
+ ### RAG Pipeline Overview
81
+
82
+ ```mermaid
83
+ graph TB
84
+ A[Document Upload] --> B[Document Processing]
85
+ B --> C[Text Extraction]
86
+ C --> D[Chunking Strategy]
87
+ D --> E[Embedding Generation]
88
+ E --> F[Vector Store FAISS]
89
+
90
+ G[User Query] --> H[Query Embedding]
91
+ H --> I[Hybrid Retrieval]
92
+
93
+ F --> I
94
+ J[BM25 Index] --> I
95
+
96
+ I --> K[Reranking]
97
+ K --> L[Context Assembly]
98
+ L --> M[LLM Groq Llama 3.1]
99
+ M --> N[Response Generation]
100
+ N --> O[Streaming Output]
101
+
102
+ P[Chat History] --> M
103
+ N --> P
104
+
105
+ style A fill:#e1f5ff
106
+ style G fill:#e1f5ff
107
+ style F fill:#ffe1f5
108
+ style J fill:#ffe1f5
109
+ style M fill:#f5e1ff
110
+ style O fill:#e1ffe1
111
+ ```
112
+
113
+ ### System Architecture
114
+
115
+ ```mermaid
116
+ graph LR
117
+ A[Client Browser] -->|HTTP/WebSocket| B[Flask Server]
118
+ B --> C[Document Processor]
119
+ B --> D[RAG Engine]
120
+ B --> E[TTS Service]
121
+
122
+ C --> F[(File Storage)]
123
+ D --> G[(FAISS Vector DB)]
124
+ D --> H[(BM25 Index)]
125
+ D --> I[Groq API]
126
+
127
+ J[HuggingFace Models] --> D
128
+
129
+ style B fill:#4a90e2
130
+ style D fill:#e24a90
131
+ style I fill:#90e24a
132
+ ```
133
+
134
+ ### Data Flow
135
+
136
+ 1. **Document Ingestion**: Files are uploaded and validated
137
+ 2. **Processing Pipeline**: Text extraction β†’ Chunking β†’ Embedding
138
+ 3. **Indexing**: Dual indexing (FAISS + BM25) for hybrid search
139
+ 4. **Query Processing**: User queries are embedded and searched
140
+ 5. **Retrieval**: Top-k relevant chunks retrieved using hybrid approach
141
+ 6. **Generation**: LLM generates contextual responses with citations
142
+ 7. **Streaming**: Responses streamed back to client in real-time
143
+
144
+ ---
145
+
146
+ ## πŸ›  Technology Stack
147
+
148
+ ### Backend
149
+
150
+ | Component | Technology | Purpose |
151
+ |-----------|-----------|---------|
152
+ | **Framework** | Flask 2.3+ | Web application framework |
153
+ | **RAG** | LangChain | RAG pipeline orchestration |
154
+ | **Vector DB** | FAISS | Fast similarity search |
155
+ | **Keyword Search** | BM25 | Sparse retrieval |
156
+ | **LLM** | Groq Llama 3.1 | Response generation |
157
+ | **Embeddings** | HuggingFace Transformers | Semantic embeddings |
158
+ | **Doc Processing** | Unstructured, PyPDF, python-docx | Multi-format parsing |
159
+
160
+ ### Frontend
161
+
162
+ | Component | Technology |
163
+ |-----------|-----------|
164
+ | **UI Framework** | TailwindCSS |
165
+ | **JavaScript** | Vanilla ES6+ |
166
+ | **Icons** | Font Awesome |
167
+ | **Markdown** | Marked.js |
168
+
169
+ ### Infrastructure
170
+
171
+ - **Containerization**: Docker + Docker Compose
172
+ - **Deployment**: HuggingFace Spaces, local, cloud-agnostic
173
+ - **Security**: Environment-based secrets, non-root execution
174
+
175
+ ---
176
+
177
+ ## πŸš€ Quick Start
178
+
179
+ ### Prerequisites
180
+
181
+ - Python 3.9+
182
+ - Docker (optional, recommended)
183
+ - Groq API Key ([Get one here](https://console.groq.com/keys))
184
+
185
+ ### Installation Methods
186
+
187
+ #### 🐳 Method 1: Docker (Recommended)
188
+
189
+ ```bash
190
+ # Clone the repository
191
+ git clone https://github.com/RautRitesh/Chat-with-docs
192
+ cd cognichat
193
+
194
+ # Create environment file
195
+ cp .env.example .env
196
+
197
+ # Add your Groq API key to .env
198
+ echo "GROQ_API_KEY=your_actual_api_key_here" >> .env
199
+
200
+ # Build and run with Docker Compose
201
+ docker-compose up -d
202
+
203
+ # Or build manually
204
+ docker build -t cognichat .
205
+ docker run -p 7860:7860 --env-file .env cognichat
206
+ ```
207
+
208
+ #### 🐍 Method 2: Local Python Environment
209
+
210
+ ```bash
211
+ # Clone the repository
212
+ git clone https://github.com/RautRitesh/Chat-with-docs
213
+ cd cognichat
214
+
215
+ # Create virtual environment
216
+ python -m venv venv
217
+ source venv/bin/activate # On Windows: venv\Scripts\activate
218
+
219
+ # Install dependencies
220
+ pip install -r requirements.txt
221
+
222
+ # Set environment variables
223
+ export GROQ_API_KEY=your_actual_api_key_here
224
+
225
+ # Run the application
226
+ python app.py
227
+ ```
228
+
229
+ #### πŸ€— Method 3: HuggingFace Spaces
230
+
231
+ 1. Fork this repository
232
+ 2. Create a new Space on [HuggingFace](https://huggingface.co/spaces)
233
+ 3. Link your forked repository
234
+ 4. Add `GROQ_API_KEY` in Settings β†’ Repository Secrets
235
+ 5. Space will auto-deploy!
236
+
237
+ ### First Steps
238
+
239
+ 1. Open `http://localhost:7860` in your browser
240
+ 2. Upload a document (PDF, DOCX, TXT, or image)
241
+ 3. Wait for processing (progress indicator will show status)
242
+ 4. Start chatting with your document!
243
+ 5. Use the πŸ”Š button to hear responses via TTS
244
+
245
+ ---
246
+
247
+ ## πŸ“¦ Deployment
248
+
249
+ ### Environment Variables
250
+
251
+ Create a `.env` file with the following variables:
252
+
253
+ ```bash
254
+ # Required
255
+ GROQ_API_KEY=your_groq_api_key_here
256
+
257
+ # Optional
258
+ PORT=7860
259
+ HF_HOME=/tmp/huggingface_cache # For HF Spaces
260
+ FLASK_DEBUG=0 # Set to 1 for development
261
+ MAX_UPLOAD_SIZE=10485760 # 10MB default
262
+ ```
263
+
264
+ ### Docker Deployment
265
+
266
+ ```bash
267
+ # Production build
268
+ docker build -t cognichat:latest .
269
+
270
+ # Run with resource limits
271
+ docker run -d \
272
+ --name cognichat \
273
+ -p 7860:7860 \
274
+ --env-file .env \
275
+ --memory="2g" \
276
+ --cpus="1.5" \
277
+ cognichat:latest
278
+ ```
279
+
280
+ ### Docker Compose
281
+
282
+ ```yaml
283
+ version: '3.8'
284
+
285
+ services:
286
+ cognichat:
287
+ build: .
288
+ ports:
289
+ - "7860:7860"
290
+ environment:
291
+ - GROQ_API_KEY=${GROQ_API_KEY}
292
+ volumes:
293
+ - ./data:/app/data
294
+ restart: unless-stopped
295
+ ```
296
+
297
+ ### HuggingFace Spaces Configuration
298
+
299
+ Add these files to your repository:
300
+
301
+ **app_port** in `README.md` header:
302
+ ```yaml
303
+ app_port: 7860
304
+ ```
305
+
306
+ **Repository Secrets**:
307
+ - `GROQ_API_KEY`: Your Groq API key
308
+
309
+ The application automatically detects HF Spaces environment and adjusts paths accordingly.
310
+
311
+ ---
312
+
313
+ ## βš™οΈ Configuration
314
+
315
+ ### Document Processing Settings
316
+
317
+ ```python
318
+ # In app.py - Customize these settings
319
+ CHUNK_SIZE = 1000 # Characters per chunk
320
+ CHUNK_OVERLAP = 200 # Overlap between chunks
321
+ EMBEDDING_MODEL = "sentence-transformers/all-miniLM-L6-v2"
322
+ RETRIEVER_K = 5 # Number of chunks to retrieve
323
+ ```
324
+
325
+ ### Model Configuration
326
+
327
+ ```python
328
+ # LLM Settings
329
+ LLM_PROVIDER = "groq"
330
+ MODEL_NAME = "llama-3.1-70b-versatile"
331
+ TEMPERATURE = 0.7
332
+ MAX_TOKENS = 2048
333
+ ```
334
+
335
+ ### Search Configuration
336
+
337
+ ```python
338
+ # Hybrid Search Weights
339
+ FAISS_WEIGHT = 0.6 # Semantic search weight
340
+ BM25_WEIGHT = 0.4 # Keyword search weight
341
+ ```
342
+
343
+ ---
344
+
345
+ ## πŸ“š API Reference
346
+
347
+ ### Endpoints
348
+
349
+ #### Upload Document
350
+
351
+ ```http
352
+ POST /upload
353
+ Content-Type: multipart/form-data
354
+
355
+ {
356
+ "file": <binary>
357
+ }
358
+ ```
359
+
360
+ **Response**:
361
+ ```json
362
+ {
363
+ "status": "success",
364
+ "message": "Document processed successfully",
365
+ "filename": "example.pdf",
366
+ "chunks": 45
367
+ }
368
+ ```
369
+
370
+ #### Chat
371
+
372
+ ```http
373
+ POST /chat
374
+ Content-Type: application/json
375
+
376
+ {
377
+ "message": "What is the main topic?",
378
+ "stream": true
379
+ }
380
+ ```
381
+
382
+ **Response** (Streaming):
383
+ ```
384
+ data: {"token": "The", "done": false}
385
+ data: {"token": " main", "done": false}
386
+ data: {"token": " topic", "done": false}
387
+ data: {"done": true}
388
+ ```
389
+
390
+ #### Clear Session
391
+
392
+ ```http
393
+ POST /clear
394
+ ```
395
+
396
+ **Response**:
397
+ ```json
398
+ {
399
+ "status": "success",
400
+ "message": "Session cleared"
401
+ }
402
+ ```
403
+
404
+ ---
405
+
406
+ ## πŸ”§ Troubleshooting
407
+
408
+ ### Common Issues
409
+
410
+ #### 1. Permission Errors in Docker
411
+
412
+ **Problem**: `Permission denied` when writing to cache directories
413
+
414
+ **Solution**:
415
+ ```bash
416
+ # Rebuild with proper permissions
417
+ docker build --no-cache -t cognichat .
418
+
419
+ # Or run with volume permissions
420
+ docker run -v $(pwd)/cache:/tmp/huggingface_cache \
421
+ --user $(id -u):$(id -g) \
422
+ cognichat
423
+ ```
424
+
425
+ #### 2. Model Loading Fails
426
+
427
+ **Problem**: Cannot download HuggingFace models
428
+
429
+ **Solution**:
430
+ ```bash
431
+ # Pre-download models
432
+ python test_embeddings.py
433
+
434
+ # Or use HF_HOME environment variable
435
+ export HF_HOME=/path/to/writable/directory
436
+ ```
437
+
438
+ #### 3. Chat Returns 400 Error
439
+
440
+ **Problem**: Upload directory not writable (common in HF Spaces)
441
+
442
+ **Solution**: Application now automatically uses `/tmp/uploads` in HF Spaces environment. Ensure latest version is deployed.
443
+
444
+ #### 4. API Key Invalid
445
+
446
+ **Problem**: Groq API returns authentication error
447
+
448
+ **Solution**:
449
+ - Verify key at [Groq Console](https://console.groq.com/keys)
450
+ - Check `.env` file has correct format: `GROQ_API_KEY=gsk_...`
451
+ - Restart application after updating key
452
+
453
+ ### Debug Mode
454
+
455
+ Enable detailed logging:
456
+
457
+ ```bash
458
+ export FLASK_DEBUG=1
459
+ export LANGCHAIN_VERBOSE=true
460
+ python app.py
461
+ ```
462
+
463
+ ---
464
+
465
+ ## πŸ§ͺ Testing
466
+
467
+ ```bash
468
+ # Run test suite
469
+ pytest tests/
470
+
471
+ # Test embedding model
472
+ python test_embeddings.py
473
+
474
+ # Test document processing
475
+ pytest tests/test_document_processor.py
476
+
477
+ # Integration tests
478
+ pytest tests/test_integration.py
479
+ ```
480
+
481
+ ---
482
+
483
+ ## 🀝 Contributing
484
+
485
+ We welcome contributions! Please follow these steps:
486
+
487
+ 1. Fork the repository
488
+ 2. Create a feature branch (`git checkout -b feature/amazing-feature`)
489
+ 3. Commit your changes (`git commit -m 'Add amazing feature'`)
490
+ 4. Push to the branch (`git push origin feature/amazing-feature`)
491
+ 5. Open a Pull Request
492
+
493
+ ### Development Guidelines
494
+
495
+ - Follow PEP 8 style guide
496
+ - Add tests for new features
497
+ - Update documentation
498
+ - Ensure Docker build succeeds
499
+
500
+ ---
501
+
502
+ ## πŸ“ Changelog
503
+
504
+ ### Version 2.0 (October 2025)
505
+
506
+ βœ… **Major Improvements**:
507
+ - Fixed Docker permission issues
508
+ - HuggingFace Spaces compatibility
509
+ - Enhanced error handling
510
+ - Multiple model loading fallbacks
511
+ - Improved security (non-root execution)
512
+
513
+ βœ… **Bug Fixes**:
514
+ - Upload directory write permissions
515
+ - Cache directory access
516
+ - Model initialization reliability
517
+
518
+ ### Version 1.0 (Initial Release)
519
+
520
+ - Basic RAG functionality
521
+ - PDF and DOCX support
522
+ - FAISS vector store
523
+ - Conversational memory
524
+
525
+ ---
526
+
527
+ ## πŸ“„ License
528
+
529
+ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
530
+
531
+ ---
532
+
533
+ ## πŸ™ Acknowledgments
534
+
535
+ - **LangChain** for RAG framework
536
+ - **Groq** for high-speed LLM inference
537
+ - **HuggingFace** for embeddings and hosting
538
+ - **FAISS** for efficient vector search
539
+
540
+ ---
541
+
542
+ ## πŸ“ž Support
543
+
544
+ - **Issues**: [GitHub Issues](https://github.com/yourusername/cognichat/issues)
545
+ - **Discussions**: [GitHub Discussions](https://github.com/yourusername/cognichat/discussions)
546
+ - **Email**: riteshraut123321@gmail.com
547
+
548
+ ---
549
+
550
+ <div align="center">
551
+
552
+ **Made with ❀️ by the CogniChat Team**
553
+
554
+ [⭐ Star us on GitHub](https://github.com/yourusername/cognichat) β€’ [πŸ› Report Bug](https://github.com/yourusername/cognichat/issues) β€’ [✨ Request Feature](https://github.com/yourusername/cognichat/issues)
555
+
556
+ </div>