Spaces:

Zeri00
/

Cogni-chat-document-reader

Sleeping

App Files Files Community

riteshraut commited on Oct 12, 2025

Commit

ba4d135

1 Parent(s): 07bb4d0

Add Dockerfile and final setup for Hugging Face deployment

Browse files

Files changed (6) hide show

Dockerfile +27 -0
app.py +100 -0
packages.txt +2 -0
rag_processor.py +131 -0
requirements.txt +0 -0
templates/index.html +342 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,27 @@

+# Use an official Python runtime as a parent image
+FROM python:3.10-slim
+# Set the working directory in the container
+WORKDIR /code
+# Copy the system dependencies file and install them
+# We need tesseract for OCR on images (used by unstructured)
+COPY packages.txt .
+RUN apt-get update && apt-get install -y --no-install-recommends $(cat packages.txt) && \
+    rm -rf /var/lib/apt/lists/*
+# Copy the Python requirements file
+COPY requirements.txt .
+# Install the Python dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy the rest of your application code into the container
+COPY . .
+# Expose the port that Hugging Face Spaces uses
+EXPOSE 7860
+# Command to run your application using gunicorn (a production-ready server)
+# It binds the app to port 7860, which is the standard for HF Spaces.
+CMD ["gunicorn", "--bind", "0.0.0.0:7860", "app:app"]

app.py ADDED Viewed

	@@ -0,0 +1,100 @@

+import os
+from flask import Flask, request, render_template, session, jsonify, Response, stream_with_context
+from werkzeug.utils import secure_filename
+from rag_processor import create_rag_chain
+import time
+# --- Basic Flask App Setup ---
+app = Flask(__name__)
+# A secret key is needed for session management
+app.config['SECRET_KEY'] = os.urandom(24)
+# Configure the upload folder
+app.config['UPLOAD_FOLDER'] = 'uploads'
+# Ensure the upload folder exists
+os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)
+# In-memory storage for RAG chains to avoid re-creating them on every request.
+# In a production scenario, you might want a more persistent cache like Redis.
+rag_chains = {}
+@app.route('/', methods=['GET'])
+def index():
+    """
+    Renders the main page.
+    """
+    return render_template('index.html')
+@app.route('/upload', methods=['POST'])
+def upload_file():
+    """
+    Handles file uploads and processing.
+    """
+    if 'file' not in request.files:
+        return jsonify({'status': 'error', 'message': 'No file part in the request.'}), 400
+    file = request.files['file']
+    if file.filename == '':
+        return jsonify({'status': 'error', 'message': 'No selected file.'}), 400
+    if file:
+        filename = secure_filename(file.filename)
+        filepath = os.path.join(app.config['UPLOAD_FOLDER'], filename)
+        file.save(filepath)
+        try:
+            # --- RAG Chain Creation ---
+            print(f"Creating RAG chain for {filename}...")
+            # Simulate a delay for demonstration purposes of the loading animation
+            time.sleep(2)
+            rag_chains[filename] = create_rag_chain(filepath)
+            print("RAG chain created successfully.")
+            # Store the filename in the user's session
+            session['filename'] = filename
+            return jsonify({'status': 'success', 'filename': filename})
+        except Exception as e:
+            print(f"Error creating RAG chain: {e}")
+            if os.path.exists(filepath):
+                os.remove(filepath)
+            return jsonify({'status': 'error', 'message': f'Failed to process file: {str(e)}'}), 500
+    return jsonify({'status': 'error', 'message': 'An unexpected error occurred.'}), 500
+@app.route('/chat', methods=['POST'])
+def chat():
+    """
+    Handles chat messages from the user and streams the response.
+    """
+    data = request.get_json()
+    question = data.get('question')
+    filename = session.get('filename')
+    if not question:
+        return jsonify({'status': 'error', 'message': 'Question is missing.'}), 400
+    if not filename or filename not in rag_chains:
+        return jsonify({'status': 'error', 'message': 'File not uploaded or processed yet.'}), 400
+    try:
+        rag_chain = rag_chains[filename]
+        def generate():
+            """A generator function to stream the response."""
+            for chunk in rag_chain.stream(question):
+                yield chunk
+        # Use stream_with_context to ensure the generator has access to the request context
+        return Response(stream_with_context(generate()), mimetype='text/plain')
+    except Exception as e:
+        print(f"Error during chat invocation: {e}")
+        # This error won't be sent as a stream, handle appropriately
+        return Response("An error occurred while getting the answer.", status=500, mimetype='text/plain')
+if __name__ == '__main__':
+    app.run(debug=True, port=5001)

packages.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ libfaiss-dev
2	+ tesseract-ocr

rag_processor.py ADDED Viewed

	@@ -0,0 +1,131 @@

+import os
+from dotenv import load_dotenv
+# Document Loaders
+from langchain_community.document_loaders import (
+    TextLoader,
+    PyPDFLoader,
+    Docx2txtLoader,
+    UnstructuredImageLoader,
+)
+# Text Splitter
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+# Embeddings
+from langchain_huggingface import HuggingFaceEmbeddings
+# Vector Stores
+from langchain_community.vectorstores import FAISS
+# LLM
+from langchain_groq import ChatGroq
+# Prompting
+from langchain.prompts import PromptTemplate
+# Chains
+from langchain_core.runnables import RunnableParallel, RunnablePassthrough
+from langchain_core.output_parsers import StrOutputParser
+# A dictionary to map file extensions to their corresponding loader classes
+LOADER_MAPPING = {
+    ".txt": TextLoader,
+    ".pdf": PyPDFLoader,
+    ".docx": Docx2txtLoader,
+    ".jpeg": UnstructuredImageLoader,
+    ".jpg": UnstructuredImageLoader,
+    ".png": UnstructuredImageLoader,
+}
+def create_rag_chain(filepath):
+    """
+    Creates a Retrieval-Augmented Generation (RAG) chain from a given file path.
+    Args:
+        filepath (str): The path to the document file.
+    Returns:
+        A LangChain runnable object representing the RAG chain.
+    Raises:
+        ValueError: If the file extension is not supported.
+    """
+    # Load environment variables from .env file
+    load_dotenv()
+    api_key = os.getenv("GROQ_API_KEY")
+    if not api_key:
+        raise ValueError("GROQ_API_KEY not found in environment variables.")
+    # --- 1. Load Document ---
+    print("Loading document...")
+    file_extension = "." + filepath.rsplit(".", 1)[-1].lower()
+    if file_extension in LOADER_MAPPING:
+        loader_class = LOADER_MAPPING[file_extension]
+        # For image loaders, mode="single" can be useful to treat the image as one document
+        if file_extension in [".jpeg", ".jpg", ".png"]:
+             loader = loader_class(filepath, mode="single")
+        else:
+             loader = loader_class(filepath)
+        docs = loader.load()
+    else:
+        raise ValueError(f"Unsupported file type: '{file_extension}'")
+    print(f"Document loaded successfully. Number of pages/docs: {len(docs)}")
+    # --- 2. Split Text ---
+    print("\nSplitting document into chunks...")
+    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
+    splits = text_splitter.split_documents(docs)
+    print(f"{len(splits)} chunks created.")
+    # --- 3. Create Embeddings ---
+    print("\nInitializing Hugging Face embeddings model...")
+    model_name = "BAAI/bge-base-en-v1.5"
+    model_kwargs = {'device': 'cpu'}
+    encode_kwargs = {'normalize_embeddings': True}
+    embeddings = HuggingFaceEmbeddings(
+        model_name=model_name,
+        model_kwargs=model_kwargs,
+        encode_kwargs=encode_kwargs
+    )
+    print("Embeddings model loaded.")
+    # --- 4. Create Vector Store ---
+    print("\nCreating FAISS vector store from document chunks...")
+    vectorstore = FAISS.from_documents(documents=splits, embedding=embeddings)
+    print("Vector store created successfully.")
+    # --- 5. Create Retriever ---
+    retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
+    # --- 6. Define the Prompt Template ---
+    template = """
+You are an expert program. Your job is to provide accurate and helpful answers based ONLY on the provided context.
+If the information is not in the context, say that you don't know the answer.
+Keep your more ellaborated and  explain in a clear way.
+Context: {context}
+Question: {question}
+Answer:
+"""
+    prompt = PromptTemplate.from_template(template)
+    # --- 7. Initialize the LLM ---
+    llm = ChatGroq(model_name="llama-3.1-8b-instant", api_key=api_key, temperature=0)
+    # --- 8. Create the RAG Chain ---
+    rag_chain = (
+        RunnableParallel(
+            context=retriever,
+            question=RunnablePassthrough()
+        )
+        | prompt
+        | llm
+        | StrOutputParser()
+    )
+    return rag_chain

requirements.txt ADDED Viewed

Binary file (1.21 kB). View file

templates/index.html ADDED Viewed

	@@ -0,0 +1,342 @@

+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>CogniChat - Chat with your Documents</title>
+    <script src="https://cdn.tailwindcss.com"></script>
+    <link rel="preconnect" href="https://fonts.googleapis.com">
+    <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
+    <link href="https://fonts.googleapis.com/css2?family=Google+Sans:wght@400;500;700&family=Roboto:wght@400;500&display=swap" rel="stylesheet">
+    <script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
+    <style>
+        :root {
+            --background: #f0f4f9;
+            --foreground: #1f1f1f;
+            --primary: #1a73e8;
+            --primary-hover: #1867cf;
+            --card: #ffffff;
+            --card-border: #dadce0;
+            --input-bg: #e8f0fe;
+            --user-bubble: #d9e7ff;
+            --bot-bubble: #f1f3f4;
+        }
+        /* Dark mode styles */
+        .dark {
+            --background: #202124;
+            --foreground: #e8eaed;
+            --primary: #8ab4f8;
+            --primary-hover: #99bdfa;
+            --card: #303134;
+            --card-border: #5f6368;
+            --input-bg: #303134;
+            --user-bubble: #3c4043;
+            --bot-bubble: #3c4043;
+        }
+        body {
+            font-family: 'Google Sans', 'Roboto', sans-serif;
+            background-color: var(--background);
+            color: var(--foreground);
+            overflow: hidden;
+        }
+        #chat-window::-webkit-scrollbar { width: 8px; }
+        #chat-window::-webkit-scrollbar-track { background: transparent; }
+        #chat-window::-webkit-scrollbar-thumb { background-color: #bdc1c6; border-radius: 20px; }
+        .dark #chat-window::-webkit-scrollbar-thumb { background-color: #5f6368; }
+        .drop-zone--over {
+            border-color: var(--primary);
+            box-shadow: 0 0 15px rgba(26, 115, 232, 0.3);
+        }
+        /* Loading Spinner */
+        .loader {
+            width: 48px;
+            height: 48px;
+            border: 3px solid var(--card-border);
+            border-radius: 50%;
+            display: inline-block;
+            position: relative;
+            box-sizing: border-box;
+            animation: rotation 1s linear infinite;
+        }
+        .loader::after {
+            content: '';
+            box-sizing: border-box;
+            position: absolute;
+            left: 50%;
+            top: 50%;
+            transform: translate(-50%, -50%);
+            width: 56px;
+            height: 56px;
+            border-radius: 50%;
+            border: 3px solid;
+            border-color: var(--primary) transparent;
+        }
+        @keyframes rotation {
+            0% { transform: rotate(0deg); }
+            100% { transform: rotate(360deg); }
+        }
+        /* Markdown Styling */
+        .markdown-content p { margin-bottom: 0.75rem; line-height: 1.75; }
+        .markdown-content ul, .markdown-content ol { margin-left: 1.5rem; margin-bottom: 0.75rem; }
+        .markdown-content code { background-color: rgba(0,0,0,0.05); padding: 0.2rem 0.4rem; border-radius: 0.25rem; font-family: 'Roboto Mono', monospace; font-size: 0.9em; }
+        .dark .markdown-content code { background-color: rgba(255,255,255,0.1); }
+        .markdown-content pre { position: relative; background-color: #f8f9fa; border: 1px solid var(--card-border); border-radius: 0.5rem; margin-bottom: 1rem; }
+        .dark .markdown-content pre { background-color: #2e2f32; }
+        .markdown-content pre code { background: none; padding: 1rem; display: block; overflow-x: auto; }
+        .markdown-content pre .copy-code-btn { position: absolute; top: 0.5rem; right: 0.5rem; background-color: #e8eaed; border: 1px solid #dadce0; color: #5f6368; padding: 0.3rem 0.6rem; border-radius: 0.25rem; cursor: pointer; opacity: 0; transition: opacity 0.2s; font-size: 0.8em;}
+        .dark .markdown-content pre .copy-code-btn { background-color: #3c4043; border-color: #5f6368; color: #e8eaed; }
+        .markdown-content pre:hover .copy-code-btn { opacity: 1; }
+    </style>
+</head>
+<body class="w-screen h-screen dark"> <!-- Default to dark mode -->
+    <main id="main-content" class="h-full flex flex-col transition-opacity duration-500">
+        <!-- Chat Area -->
+        <div id="chat-container" class="hidden flex-1 flex flex-col w-full mx-auto overflow-hidden">
+            <header class="text-center p-4 border-b border-[var(--card-border)] flex-shrink-0">
+                <h1 class="text-xl font-medium">Chat with your Docs</h1>
+                <p id="chat-filename" class="text-xs text-gray-500 dark:text-gray-400 mt-1"></p>
+            </header>
+            <div id="chat-window" class="flex-1 overflow-y-auto p-4 md:p-6 lg:p-10">
+                <div id="chat-content" class="max-w-4xl mx-auto space-y-8">
+                    <!-- Chat messages will be appended here -->
+                </div>
+            </div>
+            <div class="p-4 flex-shrink-0 bg-[var(--background)] border-t border-[var(--card-border)]">
+                <form id="chat-form" class="max-w-4xl mx-auto bg-[var(--card)] rounded-full p-2 flex items-center shadow-sm border border-transparent focus-within:border-[var(--primary)] transition-colors">
+                    <input type="text" id="chat-input" placeholder="Ask a question about your document..." class="flex-grow bg-transparent focus-outline-none px-4 text-sm" autocomplete="off">
+                    <button type="submit" id="chat-submit-btn" class="bg-[var(--primary)] hover:bg-[var(--primary-hover)] text-white p-2 rounded-full transition-all duration-200 disabled:opacity-50 disabled:cursor-not-allowed disabled:bg-gray-500" title="butn">
+                        <svg class="w-5 h-5" viewBox="0 0 24 24" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M3.49941 11.5556L11.555 3.5L12.4438 4.38889L6.27721 10.5556H21.9994V11.5556H6.27721L12.4438 17.7222L11.555 18.6111L3.49941 10.5556V11.5556Z" transform="rotate(180, 12.7497, 11.0556)" fill="currentColor"></path></svg>
+                    </button>
+                </form>
+            </div>
+        </div>
+        <!-- Upload Area -->
+        <div id="upload-container" class="flex-1 flex flex-col items-center justify-center p-8 transition-opacity duration-300">
+            <div class="text-center">
+                <h1 class="text-5xl font-medium mb-4">Upload docs to chat</h1>
+                <div id="drop-zone" class="w-full max-w-lg text-center border-2 border-dashed border-[var(--card-border)] rounded-2xl p-10 transition-all duration-300 cursor-pointer bg-[var(--card)] hover:border-[var(--primary)]">
+                    <input id="file-upload" type="file" class="hidden" accept=".pdf,.txt,.docx,.jpg,.jpeg,.png" placeholder="upload your documents here">
+                    <svg class="mx-auto h-12 w-12 text-gray-400" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" ><path stroke-linecap="round" stroke-linejoin="round" d="M12 16.5V9.75m0 0l3-3m-3 3l-3 3M6.75 19.5a4.5 4.5 0 01-1.41-8.775 5.25 5.25 0 0110.233-2.33 3 3 0 013.758 3.848A3.752 3.752 0 0118 19.5H6.75z"></path></svg>
+                    <p class="mt-4 text-sm font-medium">Drag & drop a file or click to upload</p>
+                    <p id="file-name" class="mt-2 text-xs text-gray-500"></p>
+                </div>
+            </div>
+        </div>
+        <!-- Loading Overlay -->
+        <div id="loading-overlay" class="hidden fixed inset-0 bg-[var(--background)] bg-opacity-80 backdrop-blur-sm flex flex-col items-center justify-center z-50">
+            <div class="loader"></div>
+            <p id="loading-text" class="mt-6 text-sm">Processing...</p>
+        </div>
+    </main>
+    <script>
+        document.addEventListener('DOMContentLoaded', () => {
+            const uploadContainer = document.getElementById('upload-container');
+            const chatContainer = document.getElementById('chat-container');
+            const dropZone = document.getElementById('drop-zone');
+            const fileUploadInput = document.getElementById('file-upload');
+            const fileNameSpan = document.getElementById('file-name');
+            const loadingOverlay = document.getElementById('loading-overlay');
+            const loadingText = document.getElementById('loading-text');
+            const chatForm = document.getElementById('chat-form');
+            const chatInput = document.getElementById('chat-input');
+            const chatSubmitBtn = document.getElementById('chat-submit-btn');
+            const chatWindow = document.getElementById('chat-window');
+            const chatContent = document.getElementById('chat-content');
+            const chatFilename = document.getElementById('chat-filename');
+            let selectedFile = null;
+            // --- File Upload Logic ---
+            dropZone.addEventListener('click', () => fileUploadInput.click());
+            ['dragenter', 'dragover', 'dragleave', 'drop'].forEach(eventName => {
+                dropZone.addEventListener(eventName, preventDefaults, false);
+                document.body.addEventListener(eventName, preventDefaults, false);
+            });
+            ['dragenter', 'dragover'].forEach(eventName => dropZone.classList.add('drop-zone--over'));
+            ['dragleave', 'drop'].forEach(eventName => dropZone.classList.remove('drop-zone--over'));
+            dropZone.addEventListener('drop', (e) => {
+                const files = e.dataTransfer.files;
+                if (files.length > 0) handleFile(files[0]);
+            });
+            fileUploadInput.addEventListener('change', (e) => {
+                if (e.target.files.length > 0) handleFile(e.target.files[0]);
+            });
+            function preventDefaults(e) { e.preventDefault(); e.stopPropagation(); }
+            async function handleFile(file) {
+                selectedFile = file;
+                fileNameSpan.textContent = `Selected: ${file.name}`;
+                await uploadAndProcessFile();
+            }
+            async function uploadAndProcessFile() {
+                if (!selectedFile) return;
+                const formData = new FormData();
+                formData.append('file', selectedFile);
+                loadingOverlay.classList.remove('hidden');
+                const loadingSteps = [
+                    `Uploading ${selectedFile.name}...`,
+                    "Parsing document...",
+                    "Extracting text...",
+                    "Creating embeddings (this may take a moment)...",
+                    "Building knowledge base..."
+                ];
+                let stepIndex = 0;
+                loadingText.textContent = loadingSteps[stepIndex];
+                const stepInterval = setInterval(() => {
+                    stepIndex++;
+                    if (stepIndex < loadingSteps.length) {
+                        loadingText.textContent = loadingSteps[stepIndex];
+                    } else {
+                        loadingText.textContent = "Finalizing...";
+                    }
+                }, 1500);
+                try {
+                    const response = await fetch('/upload', { method: 'POST', body: formData });
+                    const result = await response.json();
+                    if (!response.ok) throw new Error(result.message || 'Unknown error occurred.');
+                    chatFilename.textContent = `Chatting with ${result.filename}`;
+                    uploadContainer.classList.add('hidden');
+                    chatContainer.classList.remove('hidden');
+                    appendMessage("I've analyzed your document. What would you like to know?", "bot");
+                } catch (error) {
+                    console.error('Upload error:', error);
+                    alert(`Error: ${error.message}`);
+                } finally {
+                    clearInterval(stepInterval);
+                    loadingOverlay.classList.add('hidden');
+                    fileNameSpan.textContent = '';
+                    selectedFile = null;
+                }
+            }
+            // --- Chat Logic ---
+            chatForm.addEventListener('submit', async (e) => {
+                e.preventDefault();
+                const question = chatInput.value.trim();
+                if (!question) return;
+                appendMessage(question, 'user');
+                chatInput.value = '';
+                chatInput.disabled = true;
+                chatSubmitBtn.disabled = true;
+                const botMessageContainer = appendMessage('', 'bot');
+                const contentDiv = botMessageContainer.querySelector('.markdown-content');
+                try {
+                    const response = await fetch('/chat', {
+                        method: 'POST',
+                        headers: { 'Content-Type': 'application/json' },
+                        body: JSON.stringify({ question: question }),
+                    });
+                    if (!response.ok) throw new Error(`Server error: ${response.statusText}`);
+                    const reader = response.body.getReader();
+                    const decoder = new TextDecoder();
+                    let fullResponse = '';
+                    while (true) {
+                        const { value, done } = await reader.read();
+                        if (done) break;
+                        fullResponse += decoder.decode(value, { stream: true });
+                        contentDiv.innerHTML = marked.parse(fullResponse);
+                        scrollToBottom();
+                    }
+                    contentDiv.querySelectorAll('pre').forEach(addCopyButton);
+                } catch (error) {
+                    console.error('Chat error:', error);
+                    contentDiv.innerHTML = `<p class="text-red-500">Error: ${error.message}</p>`;
+                } finally {
+                    chatInput.disabled = false;
+                    chatSubmitBtn.disabled = false;
+                    chatInput.focus();
+                }
+            });
+            // --- UI Helper Functions ---
+            function appendMessage(text, sender) {
+                const messageWrapper = document.createElement('div');
+                messageWrapper.className = `flex items-start gap-4`;
+                const iconSVG = sender === 'user'
+                    ? `<div class="bg-blue-100 dark:bg-gray-700 p-2.5 rounded-full flex-shrink-0 mt-1"><svg class="w-5 h-5 text-blue-600 dark:text-blue-300" viewBox="0 0 24 24"><path fill="currentColor" d="M12 12c2.21 0 4-1.79 4-4s-1.79-4-4-4-4 1.79-4 4 1.79 4 4 4zm0 2c-2.67 0-8 1.34-8 4v2h16v-2c0-2.66-5.33-4-8-4z"></path></svg></div>`
+                    : `<div class="bg-gray-200 dark:bg-gray-700 rounded-full flex-shrink-0 mt-1 text-xl flex items-center justify-center w-10 h-10">✨</div>`;
+                const messageBubble = document.createElement('div');
+                messageBubble.className = `flex-1 pt-1`;
+                const senderName = document.createElement('p');
+                senderName.className = 'font-medium text-sm mb-1';
+                senderName.textContent = sender === 'user' ? 'You' : 'CogniChat';
+                const contentDiv = document.createElement('div');
+                contentDiv.className = 'text-base markdown-content';
+                contentDiv.innerHTML = marked.parse(text);
+                messageBubble.appendChild(senderName);
+                messageBubble.appendChild(contentDiv);
+                messageWrapper.innerHTML = iconSVG;
+                messageWrapper.appendChild(messageBubble);
+                chatContent.appendChild(messageWrapper);
+                scrollToBottom();
+                return messageBubble;
+            }
+            function scrollToBottom() {
+                chatWindow.scrollTo({
+                    top: chatWindow.scrollHeight,
+                    behavior: 'smooth'
+                });
+            }
+            function addCopyButton(pre) {
+                const button = document.createElement('button');
+                button.className = 'copy-code-btn';
+                button.textContent = 'Copy';
+                pre.appendChild(button);
+                button.addEventListener('click', () => {
+                    const code = pre.querySelector('code').innerText;
+                    navigator.clipboard.writeText(code).then(() => {
+                        button.textContent = 'Copied!';
+                        setTimeout(() => button.textContent = 'Copy', 2000);
+                    });
+                });
+            }
+        });
+    </script>
+</body>
+</html>