Spaces:
Sleeping
Sleeping
initial commit
Browse files- .gitignore +225 -0
- Dockerfile +16 -0
- docHandler.py +141 -0
- frontend/css/styles.css +466 -0
- frontend/index.html +88 -0
- frontend/js/main.js +493 -0
- main.py +269 -0
- pdfHandler.py +141 -0
- requirements.txt +42 -0
- task_manager.py +145 -0
- txtHandler.py +141 -0
- webHandler.py +110 -0
.gitignore
ADDED
|
@@ -0,0 +1,225 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Node.js
|
| 2 |
+
node_modules/
|
| 3 |
+
npm-debug.log*
|
| 4 |
+
yarn-debug.log*
|
| 5 |
+
yarn-error.log*
|
| 6 |
+
.npm
|
| 7 |
+
.env.development.local
|
| 8 |
+
.env.test.local
|
| 9 |
+
.env.production.local
|
| 10 |
+
.env.local
|
| 11 |
+
|
| 12 |
+
# Byte-compiled / optimized / DLL files
|
| 13 |
+
__pycache__/
|
| 14 |
+
*.py[codz]
|
| 15 |
+
*$py.class
|
| 16 |
+
|
| 17 |
+
# C extensions
|
| 18 |
+
*.so
|
| 19 |
+
|
| 20 |
+
# Distribution / packaging
|
| 21 |
+
.Python
|
| 22 |
+
build/
|
| 23 |
+
develop-eggs/
|
| 24 |
+
dist/
|
| 25 |
+
downloads/
|
| 26 |
+
eggs/
|
| 27 |
+
.eggs/
|
| 28 |
+
lib/
|
| 29 |
+
lib64/
|
| 30 |
+
parts/
|
| 31 |
+
sdist/
|
| 32 |
+
var/
|
| 33 |
+
wheels/
|
| 34 |
+
share/python-wheels/
|
| 35 |
+
*.egg-info/
|
| 36 |
+
.installed.cfg
|
| 37 |
+
*.egg
|
| 38 |
+
MANIFEST
|
| 39 |
+
|
| 40 |
+
# PyInstaller
|
| 41 |
+
# Usually these files are written by a python script from a template
|
| 42 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
| 43 |
+
*.manifest
|
| 44 |
+
*.spec
|
| 45 |
+
|
| 46 |
+
# Installer logs
|
| 47 |
+
pip-log.txt
|
| 48 |
+
pip-delete-this-directory.txt
|
| 49 |
+
|
| 50 |
+
# Unit test / coverage reports
|
| 51 |
+
htmlcov/
|
| 52 |
+
.tox/
|
| 53 |
+
.nox/
|
| 54 |
+
.coverage
|
| 55 |
+
.coverage.*
|
| 56 |
+
.cache
|
| 57 |
+
nosetests.xml
|
| 58 |
+
coverage.xml
|
| 59 |
+
*.cover
|
| 60 |
+
*.py.cover
|
| 61 |
+
.hypothesis/
|
| 62 |
+
.pytest_cache/
|
| 63 |
+
cover/
|
| 64 |
+
|
| 65 |
+
# Translations
|
| 66 |
+
*.mo
|
| 67 |
+
*.pot
|
| 68 |
+
|
| 69 |
+
# Django stuff:
|
| 70 |
+
*.log
|
| 71 |
+
local_settings.py
|
| 72 |
+
db.sqlite3
|
| 73 |
+
db.sqlite3-journal
|
| 74 |
+
|
| 75 |
+
# Flask stuff:
|
| 76 |
+
instance/
|
| 77 |
+
.webassets-cache
|
| 78 |
+
|
| 79 |
+
# Scrapy stuff:
|
| 80 |
+
.scrapy
|
| 81 |
+
|
| 82 |
+
# Sphinx documentation
|
| 83 |
+
docs/_build/
|
| 84 |
+
|
| 85 |
+
# PyBuilder
|
| 86 |
+
.pybuilder/
|
| 87 |
+
target/
|
| 88 |
+
|
| 89 |
+
# Jupyter Notebook
|
| 90 |
+
.ipynb_checkpoints
|
| 91 |
+
|
| 92 |
+
# IPython
|
| 93 |
+
profile_default/
|
| 94 |
+
ipython_config.py
|
| 95 |
+
|
| 96 |
+
# pyenv
|
| 97 |
+
# For a library or package, you might want to ignore these files since the code is
|
| 98 |
+
# intended to run in multiple environments; otherwise, check them in:
|
| 99 |
+
# .python-version
|
| 100 |
+
|
| 101 |
+
# pipenv
|
| 102 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
| 103 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
| 104 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
| 105 |
+
# install all needed dependencies.
|
| 106 |
+
#Pipfile.lock
|
| 107 |
+
|
| 108 |
+
# UV
|
| 109 |
+
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
|
| 110 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
| 111 |
+
# commonly ignored for libraries.
|
| 112 |
+
#uv.lock
|
| 113 |
+
|
| 114 |
+
# poetry
|
| 115 |
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
| 116 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
| 117 |
+
# commonly ignored for libraries.
|
| 118 |
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
| 119 |
+
#poetry.lock
|
| 120 |
+
#poetry.toml
|
| 121 |
+
|
| 122 |
+
# pdm
|
| 123 |
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
| 124 |
+
# pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
|
| 125 |
+
# https://pdm-project.org/en/latest/usage/project/#working-with-version-control
|
| 126 |
+
#pdm.lock
|
| 127 |
+
#pdm.toml
|
| 128 |
+
.pdm-python
|
| 129 |
+
.pdm-build/
|
| 130 |
+
|
| 131 |
+
# pixi
|
| 132 |
+
# Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
|
| 133 |
+
#pixi.lock
|
| 134 |
+
# Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
|
| 135 |
+
# in the .venv directory. It is recommended not to include this directory in version control.
|
| 136 |
+
.pixi
|
| 137 |
+
|
| 138 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
| 139 |
+
__pypackages__/
|
| 140 |
+
|
| 141 |
+
# Celery stuff
|
| 142 |
+
celerybeat-schedule
|
| 143 |
+
celerybeat.pid
|
| 144 |
+
|
| 145 |
+
# SageMath parsed files
|
| 146 |
+
*.sage.py
|
| 147 |
+
|
| 148 |
+
# Environments
|
| 149 |
+
.env
|
| 150 |
+
.envrc
|
| 151 |
+
.venv
|
| 152 |
+
env/
|
| 153 |
+
venv/
|
| 154 |
+
ENV/
|
| 155 |
+
env.bak/
|
| 156 |
+
venv.bak/
|
| 157 |
+
|
| 158 |
+
# Spyder project settings
|
| 159 |
+
.spyderproject
|
| 160 |
+
.spyproject
|
| 161 |
+
|
| 162 |
+
# Rope project settings
|
| 163 |
+
.ropeproject
|
| 164 |
+
|
| 165 |
+
# mkdocs documentation
|
| 166 |
+
/site
|
| 167 |
+
|
| 168 |
+
# mypy
|
| 169 |
+
.mypy_cache/
|
| 170 |
+
.dmypy.json
|
| 171 |
+
dmypy.json
|
| 172 |
+
|
| 173 |
+
# Pyre type checker
|
| 174 |
+
.pyre/
|
| 175 |
+
|
| 176 |
+
# pytype static type analyzer
|
| 177 |
+
.pytype/
|
| 178 |
+
|
| 179 |
+
# Cython debug symbols
|
| 180 |
+
cython_debug/
|
| 181 |
+
|
| 182 |
+
# PyCharm
|
| 183 |
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
| 184 |
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
| 185 |
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
| 186 |
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
| 187 |
+
#.idea/
|
| 188 |
+
|
| 189 |
+
# Abstra
|
| 190 |
+
# Abstra is an AI-powered process automation framework.
|
| 191 |
+
# Ignore directories containing user credentials, local state, and settings.
|
| 192 |
+
# Learn more at https://abstra.io/docs
|
| 193 |
+
.abstra/
|
| 194 |
+
|
| 195 |
+
# Visual Studio Code
|
| 196 |
+
# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
|
| 197 |
+
# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
|
| 198 |
+
# and can be added to the global gitignore or merged into this file. However, if you prefer,
|
| 199 |
+
# you could uncomment the following to ignore the entire vscode folder
|
| 200 |
+
# .vscode/
|
| 201 |
+
|
| 202 |
+
# Ruff stuff:
|
| 203 |
+
.ruff_cache/
|
| 204 |
+
|
| 205 |
+
# PyPI configuration file
|
| 206 |
+
.pypirc
|
| 207 |
+
|
| 208 |
+
# Cursor
|
| 209 |
+
# Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
|
| 210 |
+
# exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
|
| 211 |
+
# refer to https://docs.cursor.com/context/ignore-files
|
| 212 |
+
.cursorignore
|
| 213 |
+
.cursorindexingignore
|
| 214 |
+
|
| 215 |
+
# Marimo
|
| 216 |
+
marimo/_static/
|
| 217 |
+
marimo/_lsp/
|
| 218 |
+
__marimo__/
|
| 219 |
+
|
| 220 |
+
# ChatWithDoc specific
|
| 221 |
+
chatWithDocEnv/
|
| 222 |
+
uploaded_files/
|
| 223 |
+
.vscode/
|
| 224 |
+
.env
|
| 225 |
+
.env.local
|
Dockerfile
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
|
| 2 |
+
# you will also find guides on how best to write your Dockerfile
|
| 3 |
+
|
| 4 |
+
FROM python:3.9
|
| 5 |
+
|
| 6 |
+
RUN useradd -m -u 1000 user
|
| 7 |
+
USER user
|
| 8 |
+
ENV PATH="/home/user/.local/bin:$PATH"
|
| 9 |
+
|
| 10 |
+
WORKDIR /app
|
| 11 |
+
|
| 12 |
+
COPY --chown=user ./requirements.txt requirements.txt
|
| 13 |
+
RUN pip install --no-cache-dir --upgrade -r requirements.txt
|
| 14 |
+
|
| 15 |
+
COPY --chown=user . /app
|
| 16 |
+
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
|
docHandler.py
ADDED
|
@@ -0,0 +1,141 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from langchain_community.document_loaders import Docx2txtLoader
|
| 2 |
+
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
| 3 |
+
from langchain.chat_models import init_chat_model
|
| 4 |
+
from langchain_huggingface import HuggingFaceEmbeddings
|
| 5 |
+
import faiss
|
| 6 |
+
from langchain_community.docstore.in_memory import InMemoryDocstore
|
| 7 |
+
from langchain_community.vectorstores import FAISS
|
| 8 |
+
import os
|
| 9 |
+
from langchain import hub
|
| 10 |
+
from dotenv import load_dotenv
|
| 11 |
+
from langgraph.graph import START, StateGraph
|
| 12 |
+
from typing import List, Dict, Any, Optional
|
| 13 |
+
from pydantic import BaseModel, Field
|
| 14 |
+
from langchain.docstore.document import Document
|
| 15 |
+
|
| 16 |
+
load_dotenv()
|
| 17 |
+
|
| 18 |
+
class State(BaseModel):
|
| 19 |
+
question: str = Field(..., description="Type your question here")
|
| 20 |
+
context: List[Document] = Field(
|
| 21 |
+
default_factory=list,
|
| 22 |
+
description="A list of Document objects",
|
| 23 |
+
)
|
| 24 |
+
answer: str = Field(default="", description="Answer will be here")
|
| 25 |
+
|
| 26 |
+
class DocProcessor:
|
| 27 |
+
def __init__(self):
|
| 28 |
+
# Load model provider
|
| 29 |
+
if not os.environ.get("GOOGLE_API_KEY"):
|
| 30 |
+
raise ValueError("Google Gemini API key not found in environment variables")
|
| 31 |
+
|
| 32 |
+
self.llm = init_chat_model("gemini-2.5-flash", model_provider="google_genai")
|
| 33 |
+
self.embedding_model = HuggingFaceEmbeddings(
|
| 34 |
+
model_name="sentence-transformers/all-MiniLM-L6-v2",
|
| 35 |
+
model_kwargs={"device": "cpu"}
|
| 36 |
+
)
|
| 37 |
+
self.prompt = hub.pull("rlm/rag-prompt")
|
| 38 |
+
self.vector_store = None
|
| 39 |
+
self.chunk_size = 1000
|
| 40 |
+
self.chunk_overlap = 200
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
def process_docx(self, file_path: str) -> Dict[str, Any]:
|
| 44 |
+
"""
|
| 45 |
+
Process a DOCX file and prepare it for querying
|
| 46 |
+
|
| 47 |
+
Args:
|
| 48 |
+
file_path (str): Path to the DOCX file
|
| 49 |
+
|
| 50 |
+
Returns:
|
| 51 |
+
Dict[str, Any]: Processing status and information
|
| 52 |
+
"""
|
| 53 |
+
try:
|
| 54 |
+
# Document Loading
|
| 55 |
+
loader = Docx2txtLoader(file_path)
|
| 56 |
+
pages = loader.load()
|
| 57 |
+
|
| 58 |
+
# Text Splitting
|
| 59 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=self.chunk_size, chunk_overlap=self.chunk_overlap)
|
| 60 |
+
texts = text_splitter.split_documents(pages)
|
| 61 |
+
|
| 62 |
+
# Vector Store Setup
|
| 63 |
+
embedding_dim = len(self.embedding_model.embed_query("test"))
|
| 64 |
+
index = faiss.IndexFlatL2(embedding_dim)
|
| 65 |
+
|
| 66 |
+
self.vector_store = FAISS(
|
| 67 |
+
embedding_function=self.embedding_model,
|
| 68 |
+
index=index,
|
| 69 |
+
docstore=InMemoryDocstore(),
|
| 70 |
+
index_to_docstore_id={},
|
| 71 |
+
)
|
| 72 |
+
|
| 73 |
+
# Index chunks
|
| 74 |
+
self.vector_store.add_documents(documents=texts)
|
| 75 |
+
|
| 76 |
+
return {
|
| 77 |
+
"status": "success",
|
| 78 |
+
"message": "DOCX processed successfully",
|
| 79 |
+
"num_pages": len(pages),
|
| 80 |
+
"num_chunks": len(texts)
|
| 81 |
+
}
|
| 82 |
+
except Exception as e:
|
| 83 |
+
return {
|
| 84 |
+
"status": "error",
|
| 85 |
+
"message": f"Error processing DOCX: {str(e)}"
|
| 86 |
+
}
|
| 87 |
+
|
| 88 |
+
def query_response(self, query: str) -> Dict[str, Any]:
|
| 89 |
+
"""
|
| 90 |
+
Query the processed document
|
| 91 |
+
|
| 92 |
+
Args:
|
| 93 |
+
query (str): The question to ask about the document
|
| 94 |
+
|
| 95 |
+
Returns:
|
| 96 |
+
Dict[str, Any]: Answer and relevant context
|
| 97 |
+
"""
|
| 98 |
+
if not self.vector_store:
|
| 99 |
+
return {
|
| 100 |
+
"status": "error",
|
| 101 |
+
"message": "No document has been processed yet"
|
| 102 |
+
}
|
| 103 |
+
|
| 104 |
+
try:
|
| 105 |
+
# Create state graph
|
| 106 |
+
graph_builder = StateGraph(State)
|
| 107 |
+
|
| 108 |
+
# Define retrieval step
|
| 109 |
+
def retrieve(state: State):
|
| 110 |
+
retrieved_docs = self.vector_store.similarity_search(state.question)
|
| 111 |
+
return {"context": retrieved_docs}
|
| 112 |
+
|
| 113 |
+
# Define generation step
|
| 114 |
+
def generate(state: State):
|
| 115 |
+
docs_content = "\n\n".join(doc.page_content for doc in state.context)
|
| 116 |
+
messages = self.prompt.invoke({
|
| 117 |
+
"question": state.question,
|
| 118 |
+
"context": docs_content
|
| 119 |
+
})
|
| 120 |
+
response = self.llm.invoke(messages)
|
| 121 |
+
return {"answer": response.content}
|
| 122 |
+
|
| 123 |
+
# Build and compile the graph
|
| 124 |
+
graph = graph_builder.add_sequence([retrieve, generate]).set_entry_point("retrieve").compile()
|
| 125 |
+
|
| 126 |
+
# Execute the query
|
| 127 |
+
response = graph.invoke({
|
| 128 |
+
"question": query
|
| 129 |
+
})
|
| 130 |
+
|
| 131 |
+
return {
|
| 132 |
+
"status": "success",
|
| 133 |
+
"answer": response["answer"],
|
| 134 |
+
"query": query
|
| 135 |
+
}
|
| 136 |
+
except Exception as e:
|
| 137 |
+
return {
|
| 138 |
+
"status": "error",
|
| 139 |
+
"message": f"Error querying document: {str(e)}"
|
| 140 |
+
}
|
| 141 |
+
|
frontend/css/styles.css
ADDED
|
@@ -0,0 +1,466 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
:root {
|
| 2 |
+
--primary: #4361ee;
|
| 3 |
+
--primary-light: #4895ef;
|
| 4 |
+
--secondary: #3f37c9;
|
| 5 |
+
--accent: #4cc9f0;
|
| 6 |
+
--light: #f8f9fa;
|
| 7 |
+
--dark: #212529;
|
| 8 |
+
--success: #4ade80;
|
| 9 |
+
--warning: #facc15;
|
| 10 |
+
--danger: #f87171;
|
| 11 |
+
--gray: #6c757d;
|
| 12 |
+
--light-gray: #e9ecef;
|
| 13 |
+
--border-radius: 12px;
|
| 14 |
+
--shadow: 0 4px 20px rgba(0, 0, 0, 0.08);
|
| 15 |
+
--transition: all 0.3s ease;
|
| 16 |
+
}
|
| 17 |
+
|
| 18 |
+
* {
|
| 19 |
+
margin: 0;
|
| 20 |
+
padding: 0;
|
| 21 |
+
box-sizing: border-box;
|
| 22 |
+
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
|
| 23 |
+
}
|
| 24 |
+
|
| 25 |
+
body {
|
| 26 |
+
background: linear-gradient(135deg, #f5f7fa 0%, #e4edf5 100%);
|
| 27 |
+
color: var(--dark);
|
| 28 |
+
min-height: 100vh;
|
| 29 |
+
padding: 20px;
|
| 30 |
+
display: flex;
|
| 31 |
+
flex-direction: column;
|
| 32 |
+
align-items: center;
|
| 33 |
+
}
|
| 34 |
+
|
| 35 |
+
.container {
|
| 36 |
+
width: 100%;
|
| 37 |
+
max-width: 1200px;
|
| 38 |
+
margin: 0 auto;
|
| 39 |
+
}
|
| 40 |
+
|
| 41 |
+
header {
|
| 42 |
+
text-align: center;
|
| 43 |
+
padding: 30px 0;
|
| 44 |
+
animation: fadeIn 0.8s ease-out;
|
| 45 |
+
}
|
| 46 |
+
|
| 47 |
+
header h1 {
|
| 48 |
+
font-size: 2.8rem;
|
| 49 |
+
margin-bottom: 10px;
|
| 50 |
+
color: var(--secondary);
|
| 51 |
+
background: linear-gradient(90deg, var(--primary), var(--accent));
|
| 52 |
+
-webkit-background-clip: text;
|
| 53 |
+
background-clip: text;
|
| 54 |
+
color: transparent;
|
| 55 |
+
}
|
| 56 |
+
|
| 57 |
+
header p {
|
| 58 |
+
font-size: 1.2rem;
|
| 59 |
+
color: var(--gray);
|
| 60 |
+
max-width: 600px;
|
| 61 |
+
margin: 0 auto;
|
| 62 |
+
}
|
| 63 |
+
|
| 64 |
+
.app-container {
|
| 65 |
+
display: flex;
|
| 66 |
+
gap: 30px;
|
| 67 |
+
margin-top: 20px;
|
| 68 |
+
}
|
| 69 |
+
|
| 70 |
+
@media (max-width: 900px) {
|
| 71 |
+
.app-container {
|
| 72 |
+
flex-direction: column;
|
| 73 |
+
}
|
| 74 |
+
}
|
| 75 |
+
|
| 76 |
+
.input-section {
|
| 77 |
+
flex: 1;
|
| 78 |
+
background: white;
|
| 79 |
+
border-radius: var(--border-radius);
|
| 80 |
+
padding: 25px;
|
| 81 |
+
box-shadow: var(--shadow);
|
| 82 |
+
animation: slideInLeft 0.6s ease-out;
|
| 83 |
+
}
|
| 84 |
+
|
| 85 |
+
.chat-section {
|
| 86 |
+
flex: 1.5;
|
| 87 |
+
display: flex;
|
| 88 |
+
flex-direction: column;
|
| 89 |
+
background: white;
|
| 90 |
+
border-radius: var(--border-radius);
|
| 91 |
+
box-shadow: var(--shadow);
|
| 92 |
+
overflow: hidden;
|
| 93 |
+
animation: slideInRight 0.6s ease-out;
|
| 94 |
+
}
|
| 95 |
+
|
| 96 |
+
.section-title {
|
| 97 |
+
font-size: 1.5rem;
|
| 98 |
+
margin-bottom: 20px;
|
| 99 |
+
color: var(--secondary);
|
| 100 |
+
display: flex;
|
| 101 |
+
align-items: center;
|
| 102 |
+
gap: 10px;
|
| 103 |
+
}
|
| 104 |
+
|
| 105 |
+
.section-title i {
|
| 106 |
+
background: var(--light-gray);
|
| 107 |
+
width: 40px;
|
| 108 |
+
height: 40px;
|
| 109 |
+
border-radius: 50%;
|
| 110 |
+
display: flex;
|
| 111 |
+
align-items: center;
|
| 112 |
+
justify-content: center;
|
| 113 |
+
}
|
| 114 |
+
|
| 115 |
+
.upload-area {
|
| 116 |
+
border: 2px dashed var(--light-gray);
|
| 117 |
+
border-radius: var(--border-radius);
|
| 118 |
+
padding: 30px;
|
| 119 |
+
text-align: center;
|
| 120 |
+
margin-bottom: 25px;
|
| 121 |
+
transition: var(--transition);
|
| 122 |
+
cursor: pointer;
|
| 123 |
+
}
|
| 124 |
+
|
| 125 |
+
.upload-area:hover {
|
| 126 |
+
border-color: var(--primary);
|
| 127 |
+
background: rgba(67, 97, 238, 0.05);
|
| 128 |
+
}
|
| 129 |
+
|
| 130 |
+
.upload-area i {
|
| 131 |
+
font-size: 3rem;
|
| 132 |
+
color: var(--primary);
|
| 133 |
+
margin-bottom: 15px;
|
| 134 |
+
}
|
| 135 |
+
|
| 136 |
+
.upload-area h3 {
|
| 137 |
+
margin-bottom: 10px;
|
| 138 |
+
color: var(--dark);
|
| 139 |
+
}
|
| 140 |
+
|
| 141 |
+
.upload-area p {
|
| 142 |
+
color: var(--gray);
|
| 143 |
+
margin-bottom: 20px;
|
| 144 |
+
}
|
| 145 |
+
|
| 146 |
+
.file-types {
|
| 147 |
+
display: flex;
|
| 148 |
+
justify-content: center;
|
| 149 |
+
gap: 15px;
|
| 150 |
+
margin-top: 15px;
|
| 151 |
+
}
|
| 152 |
+
|
| 153 |
+
.file-type {
|
| 154 |
+
background: var(--light-gray);
|
| 155 |
+
padding: 8px 15px;
|
| 156 |
+
border-radius: 30px;
|
| 157 |
+
font-size: 0.9rem;
|
| 158 |
+
}
|
| 159 |
+
|
| 160 |
+
.url-input {
|
| 161 |
+
margin-bottom: 25px;
|
| 162 |
+
}
|
| 163 |
+
|
| 164 |
+
.url-input label {
|
| 165 |
+
display: block;
|
| 166 |
+
margin-bottom: 8px;
|
| 167 |
+
font-weight: 500;
|
| 168 |
+
}
|
| 169 |
+
|
| 170 |
+
.url-input input {
|
| 171 |
+
width: 100%;
|
| 172 |
+
padding: 14px;
|
| 173 |
+
border: 1px solid var(--light-gray);
|
| 174 |
+
border-radius: var(--border-radius);
|
| 175 |
+
font-size: 1rem;
|
| 176 |
+
transition: var(--transition);
|
| 177 |
+
}
|
| 178 |
+
|
| 179 |
+
.url-input input:focus {
|
| 180 |
+
outline: none;
|
| 181 |
+
border-color: var(--primary);
|
| 182 |
+
box-shadow: 0 0 0 3px rgba(67, 97, 238, 0.2);
|
| 183 |
+
}
|
| 184 |
+
|
| 185 |
+
.btn {
|
| 186 |
+
background: var(--primary);
|
| 187 |
+
color: white;
|
| 188 |
+
border: none;
|
| 189 |
+
padding: 14px 25px;
|
| 190 |
+
border-radius: var(--border-radius);
|
| 191 |
+
font-size: 1rem;
|
| 192 |
+
font-weight: 600;
|
| 193 |
+
cursor: pointer;
|
| 194 |
+
transition: var(--transition);
|
| 195 |
+
display: inline-flex;
|
| 196 |
+
align-items: center;
|
| 197 |
+
justify-content: center;
|
| 198 |
+
gap: 8px;
|
| 199 |
+
}
|
| 200 |
+
|
| 201 |
+
.btn:hover {
|
| 202 |
+
background: var(--secondary);
|
| 203 |
+
transform: translateY(-2px);
|
| 204 |
+
}
|
| 205 |
+
|
| 206 |
+
.btn:active {
|
| 207 |
+
transform: translateY(0);
|
| 208 |
+
}
|
| 209 |
+
|
| 210 |
+
.btn-block {
|
| 211 |
+
width: 100%;
|
| 212 |
+
}
|
| 213 |
+
|
| 214 |
+
.btn-outline {
|
| 215 |
+
background: transparent;
|
| 216 |
+
border: 2px solid var(--primary);
|
| 217 |
+
color: var(--primary);
|
| 218 |
+
}
|
| 219 |
+
|
| 220 |
+
.btn-outline:hover {
|
| 221 |
+
background: var(--primary);
|
| 222 |
+
color: white;
|
| 223 |
+
}
|
| 224 |
+
|
| 225 |
+
.file-list {
|
| 226 |
+
margin-top: 25px;
|
| 227 |
+
}
|
| 228 |
+
|
| 229 |
+
.file-item {
|
| 230 |
+
display: flex;
|
| 231 |
+
align-items: center;
|
| 232 |
+
padding: 12px 15px;
|
| 233 |
+
background: var(--light-gray);
|
| 234 |
+
border-radius: var(--border-radius);
|
| 235 |
+
margin-bottom: 10px;
|
| 236 |
+
animation: fadeIn 0.3s ease-out;
|
| 237 |
+
}
|
| 238 |
+
|
| 239 |
+
.file-item i {
|
| 240 |
+
margin-right: 12px;
|
| 241 |
+
color: var(--primary);
|
| 242 |
+
}
|
| 243 |
+
|
| 244 |
+
.file-info {
|
| 245 |
+
flex: 1;
|
| 246 |
+
}
|
| 247 |
+
|
| 248 |
+
.file-name {
|
| 249 |
+
font-weight: 500;
|
| 250 |
+
margin-bottom: 3px;
|
| 251 |
+
}
|
| 252 |
+
|
| 253 |
+
.file-size {
|
| 254 |
+
font-size: 0.85rem;
|
| 255 |
+
color: var(--gray);
|
| 256 |
+
}
|
| 257 |
+
|
| 258 |
+
.file-actions {
|
| 259 |
+
display: flex;
|
| 260 |
+
gap: 10px;
|
| 261 |
+
}
|
| 262 |
+
|
| 263 |
+
.file-actions button {
|
| 264 |
+
background: none;
|
| 265 |
+
border: none;
|
| 266 |
+
color: var(--gray);
|
| 267 |
+
cursor: pointer;
|
| 268 |
+
font-size: 1.1rem;
|
| 269 |
+
transition: var(--transition);
|
| 270 |
+
}
|
| 271 |
+
|
| 272 |
+
.file-actions button:hover {
|
| 273 |
+
color: var(--danger);
|
| 274 |
+
}
|
| 275 |
+
|
| 276 |
+
.chat-header {
|
| 277 |
+
background: var(--primary);
|
| 278 |
+
color: white;
|
| 279 |
+
padding: 20px;
|
| 280 |
+
display: flex;
|
| 281 |
+
align-items: center;
|
| 282 |
+
gap: 15px;
|
| 283 |
+
}
|
| 284 |
+
|
| 285 |
+
.chat-header img {
|
| 286 |
+
width: 50px;
|
| 287 |
+
height: 50px;
|
| 288 |
+
border-radius: 50%;
|
| 289 |
+
background: white;
|
| 290 |
+
padding: 5px;
|
| 291 |
+
}
|
| 292 |
+
|
| 293 |
+
.chat-messages {
|
| 294 |
+
flex: 1;
|
| 295 |
+
padding: 25px;
|
| 296 |
+
overflow-y: auto;
|
| 297 |
+
display: flex;
|
| 298 |
+
flex-direction: column;
|
| 299 |
+
gap: 20px;
|
| 300 |
+
background: #f8fafc;
|
| 301 |
+
}
|
| 302 |
+
|
| 303 |
+
.message {
|
| 304 |
+
max-width: 80%;
|
| 305 |
+
padding: 18px;
|
| 306 |
+
border-radius: var(--border-radius);
|
| 307 |
+
animation: fadeIn 0.3s ease-out;
|
| 308 |
+
position: relative;
|
| 309 |
+
box-shadow: 0 2px 5px rgba(0, 0, 0, 0.05);
|
| 310 |
+
}
|
| 311 |
+
|
| 312 |
+
.user-message {
|
| 313 |
+
background: var(--primary-light);
|
| 314 |
+
color: white;
|
| 315 |
+
align-self: flex-end;
|
| 316 |
+
border-bottom-right-radius: 5px;
|
| 317 |
+
}
|
| 318 |
+
|
| 319 |
+
.bot-message {
|
| 320 |
+
background: white;
|
| 321 |
+
border: 1px solid var(--light-gray);
|
| 322 |
+
align-self: flex-start;
|
| 323 |
+
border-bottom-left-radius: 5px;
|
| 324 |
+
}
|
| 325 |
+
|
| 326 |
+
.message-header {
|
| 327 |
+
display: flex;
|
| 328 |
+
align-items: center;
|
| 329 |
+
margin-bottom: 8px;
|
| 330 |
+
font-weight: 600;
|
| 331 |
+
}
|
| 332 |
+
|
| 333 |
+
.message-header i {
|
| 334 |
+
margin-right: 8px;
|
| 335 |
+
}
|
| 336 |
+
|
| 337 |
+
.message-content {
|
| 338 |
+
line-height: 1.5;
|
| 339 |
+
}
|
| 340 |
+
|
| 341 |
+
.typing-indicator {
|
| 342 |
+
display: flex;
|
| 343 |
+
align-items: center;
|
| 344 |
+
padding: 18px;
|
| 345 |
+
background: white;
|
| 346 |
+
border: 1px solid var(--light-gray);
|
| 347 |
+
border-radius: var(--border-radius);
|
| 348 |
+
align-self: flex-start;
|
| 349 |
+
border-bottom-left-radius: 5px;
|
| 350 |
+
width: 100px;
|
| 351 |
+
}
|
| 352 |
+
|
| 353 |
+
.typing-dot {
|
| 354 |
+
width: 8px;
|
| 355 |
+
height: 8px;
|
| 356 |
+
background: var(--gray);
|
| 357 |
+
border-radius: 50%;
|
| 358 |
+
margin: 0 3px;
|
| 359 |
+
animation: typing 1.4s infinite ease-in-out;
|
| 360 |
+
}
|
| 361 |
+
|
| 362 |
+
.typing-dot:nth-child(1) { animation-delay: 0s; }
|
| 363 |
+
.typing-dot:nth-child(2) { animation-delay: 0.2s; }
|
| 364 |
+
.typing-dot:nth-child(3) { animation-delay: 0.4s; }
|
| 365 |
+
|
| 366 |
+
.chat-input {
|
| 367 |
+
display: flex;
|
| 368 |
+
padding: 20px;
|
| 369 |
+
background: white;
|
| 370 |
+
border-top: 1px solid var(--light-gray);
|
| 371 |
+
}
|
| 372 |
+
|
| 373 |
+
.chat-input input {
|
| 374 |
+
flex: 1;
|
| 375 |
+
padding: 16px;
|
| 376 |
+
border: 1px solid var(--light-gray);
|
| 377 |
+
border-radius: 30px;
|
| 378 |
+
font-size: 1rem;
|
| 379 |
+
transition: var(--transition);
|
| 380 |
+
}
|
| 381 |
+
|
| 382 |
+
.chat-input input:focus {
|
| 383 |
+
outline: none;
|
| 384 |
+
border-color: var(--primary);
|
| 385 |
+
box-shadow: 0 0 0 3px rgba(67, 97, 238, 0.2);
|
| 386 |
+
}
|
| 387 |
+
|
| 388 |
+
.chat-input button {
|
| 389 |
+
background: var(--primary);
|
| 390 |
+
color: white;
|
| 391 |
+
border: none;
|
| 392 |
+
width: 50px;
|
| 393 |
+
height: 50px;
|
| 394 |
+
border-radius: 50%;
|
| 395 |
+
margin-left: 15px;
|
| 396 |
+
cursor: pointer;
|
| 397 |
+
transition: var(--transition);
|
| 398 |
+
display: flex;
|
| 399 |
+
align-items: center;
|
| 400 |
+
justify-content: center;
|
| 401 |
+
}
|
| 402 |
+
|
| 403 |
+
.chat-input button:hover {
|
| 404 |
+
background: var(--secondary);
|
| 405 |
+
transform: scale(1.05);
|
| 406 |
+
}
|
| 407 |
+
|
| 408 |
+
|
| 409 |
+
|
| 410 |
+
@keyframes fadeIn {
|
| 411 |
+
from { opacity: 0; transform: translateY(10px); }
|
| 412 |
+
to { opacity: 1; transform: translateY(0); }
|
| 413 |
+
}
|
| 414 |
+
|
| 415 |
+
@keyframes slideInLeft {
|
| 416 |
+
from { opacity: 0; transform: translateX(-30px); }
|
| 417 |
+
to { opacity: 1; transform: translateX(0); }
|
| 418 |
+
}
|
| 419 |
+
|
| 420 |
+
@keyframes slideInRight {
|
| 421 |
+
from { opacity: 0; transform: translateX(30px); }
|
| 422 |
+
to { opacity: 1; transform: translateX(0); }
|
| 423 |
+
}
|
| 424 |
+
|
| 425 |
+
@keyframes typing {
|
| 426 |
+
0%, 60%, 100% { transform: translateY(0); }
|
| 427 |
+
30% { transform: translateY(-5px); }
|
| 428 |
+
}
|
| 429 |
+
|
| 430 |
+
.processing {
|
| 431 |
+
display: flex;
|
| 432 |
+
align-items: center;
|
| 433 |
+
justify-content: center;
|
| 434 |
+
padding: 30px;
|
| 435 |
+
color: var(--gray);
|
| 436 |
+
}
|
| 437 |
+
|
| 438 |
+
.processing i {
|
| 439 |
+
font-size: 2rem;
|
| 440 |
+
margin-right: 15px;
|
| 441 |
+
color: var(--primary);
|
| 442 |
+
animation: spin 1.5s linear infinite;
|
| 443 |
+
}
|
| 444 |
+
|
| 445 |
+
@keyframes spin {
|
| 446 |
+
0% { transform: rotate(0deg); }
|
| 447 |
+
100% { transform: rotate(360deg); }
|
| 448 |
+
}
|
| 449 |
+
|
| 450 |
+
footer {
|
| 451 |
+
text-align: center;
|
| 452 |
+
padding: 30px 0;
|
| 453 |
+
color: var(--gray);
|
| 454 |
+
font-size: 0.9rem;
|
| 455 |
+
margin-top: auto;
|
| 456 |
+
}
|
| 457 |
+
|
| 458 |
+
.pulse {
|
| 459 |
+
animation: pulse 2s infinite;
|
| 460 |
+
}
|
| 461 |
+
|
| 462 |
+
@keyframes pulse {
|
| 463 |
+
0% { box-shadow: 0 0 0 0 rgba(67, 97, 238, 0.4); }
|
| 464 |
+
70% { box-shadow: 0 0 0 10px rgba(67, 97, 238, 0); }
|
| 465 |
+
100% { box-shadow: 0 0 0 0 rgba(67, 97, 238, 0); }
|
| 466 |
+
}
|
frontend/index.html
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="en">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="UTF-8">
|
| 5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
+
<title>ChatWithDoc - Chat with Your Documents</title>
|
| 7 |
+
<link rel="stylesheet" href="css/styles.css">
|
| 8 |
+
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css">
|
| 9 |
+
</head>
|
| 10 |
+
<body>
|
| 11 |
+
<div class="container">
|
| 12 |
+
<header>
|
| 13 |
+
<h1><i class="fas fa-robot"></i> ChatWithDoc</h1>
|
| 14 |
+
<p>Upload documents or enter URLs, then chat with your content using AI</p>
|
| 15 |
+
</header>
|
| 16 |
+
|
| 17 |
+
<div class="app-container">
|
| 18 |
+
<div class="input-section">
|
| 19 |
+
<h2 class="section-title"><i class="fas fa-file-upload"></i> Upload Documents</h2>
|
| 20 |
+
|
| 21 |
+
<div class="upload-area" id="uploadArea">
|
| 22 |
+
<i class="fas fa-cloud-upload-alt"></i>
|
| 23 |
+
<h3>Drag & Drop Files Here</h3>
|
| 24 |
+
<p>Supports PDF, DOC, DOCX, TXT files</p>
|
| 25 |
+
<button class="btn btn-outline">Browse Files</button>
|
| 26 |
+
<input type="file" id="fileInput" multiple accept=".pdf,.doc,.docx,.txt" style="display: none;">
|
| 27 |
+
|
| 28 |
+
<div class="file-types">
|
| 29 |
+
<div class="file-type">PDF</div>
|
| 30 |
+
<div class="file-type">DOC</div>
|
| 31 |
+
<div class="file-type">DOCX</div>
|
| 32 |
+
<div class="file-type">TXT</div>
|
| 33 |
+
</div>
|
| 34 |
+
</div>
|
| 35 |
+
|
| 36 |
+
<div class="file-list" id="fileList">
|
| 37 |
+
<!-- File items will be added here dynamically -->
|
| 38 |
+
</div>
|
| 39 |
+
|
| 40 |
+
<div class="url-input">
|
| 41 |
+
<label for="urlInput"><i class="fas fa-link"></i> Or Enter a Web Page URL</label>
|
| 42 |
+
<input type="url" id="urlInput" placeholder="https://example.com/article">
|
| 43 |
+
</div>
|
| 44 |
+
|
| 45 |
+
<button class="btn btn-block pulse" id="processBtn">
|
| 46 |
+
<i class="fas fa-cogs"></i> Process Documents & URLs
|
| 47 |
+
</button>
|
| 48 |
+
</div>
|
| 49 |
+
|
| 50 |
+
<div class="chat-section">
|
| 51 |
+
<div class="chat-header">
|
| 52 |
+
<img src="data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 24 24'%3E%3Cpath fill='%234361ee' d='M12 2C6.48 2 2 6.48 2 12s4.48 10 10 10 10-4.48 10-10S17.52 2 12 2zm-1 15h-2v-6h2v6zm3 0h-2v-6h2v6zm3 0h-2v-6h2v6z'/%3E%3C/svg%3E" alt="AI Assistant">
|
| 53 |
+
<div>
|
| 54 |
+
<h2>ChatWithDoc Assistant</h2>
|
| 55 |
+
<p>Ask me anything about your documents</p>
|
| 56 |
+
</div>
|
| 57 |
+
</div>
|
| 58 |
+
|
| 59 |
+
<div class="chat-messages" id="chatMessages">
|
| 60 |
+
<div class="message bot-message">
|
| 61 |
+
<div class="message-header">
|
| 62 |
+
<i class="fas fa-robot"></i> ChatWithDoc Assistant
|
| 63 |
+
</div>
|
| 64 |
+
<div class="message-content">
|
| 65 |
+
Hello! I'm your document assistant. Upload some documents or enter URLs, then ask me anything about their content. I'll help you find answers quickly.
|
| 66 |
+
</div>
|
| 67 |
+
</div>
|
| 68 |
+
</div>
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
<div class="chat-input">
|
| 73 |
+
<input type="text" id="messageInput" placeholder="Ask about your documents...">
|
| 74 |
+
<button id="sendButton">
|
| 75 |
+
<i class="fas fa-paper-plane"></i>
|
| 76 |
+
</button>
|
| 77 |
+
</div>
|
| 78 |
+
</div>
|
| 79 |
+
</div>
|
| 80 |
+
|
| 81 |
+
<footer>
|
| 82 |
+
<p>ChatWithDoc - Chat with your documents using AI</p>
|
| 83 |
+
</footer>
|
| 84 |
+
</div>
|
| 85 |
+
|
| 86 |
+
<script src="js/main.js"></script>
|
| 87 |
+
</body>
|
| 88 |
+
</html>
|
frontend/js/main.js
ADDED
|
@@ -0,0 +1,493 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// DOM Elements
|
| 2 |
+
const uploadArea = document.getElementById('uploadArea');
|
| 3 |
+
const fileInput = document.getElementById('fileInput');
|
| 4 |
+
const urlInput = document.getElementById('urlInput');
|
| 5 |
+
const processBtn = document.getElementById('processBtn');
|
| 6 |
+
const fileList = document.getElementById('fileList');
|
| 7 |
+
const chatMessages = document.getElementById('chatMessages');
|
| 8 |
+
const messageInput = document.getElementById('messageInput');
|
| 9 |
+
const sendButton = document.getElementById('sendButton');
|
| 10 |
+
|
| 11 |
+
// API Base URL
|
| 12 |
+
const API_BASE = '/';
|
| 13 |
+
|
| 14 |
+
console.log('JavaScript loaded successfully');
|
| 15 |
+
|
| 16 |
+
// Event Listeners
|
| 17 |
+
uploadArea.addEventListener('click', () => {
|
| 18 |
+
console.log('Upload area clicked');
|
| 19 |
+
fileInput.click();
|
| 20 |
+
});
|
| 21 |
+
|
| 22 |
+
fileInput.addEventListener('change', (e) => {
|
| 23 |
+
console.log('File input changed');
|
| 24 |
+
const files = e.target.files;
|
| 25 |
+
console.log('Files detected:', files.length); // Debug log
|
| 26 |
+
|
| 27 |
+
if (files.length > 0) {
|
| 28 |
+
console.log('Files selected:', files.length);
|
| 29 |
+
|
| 30 |
+
// Clear previous documents and UI first
|
| 31 |
+
clearPreviousDocuments();
|
| 32 |
+
|
| 33 |
+
// Store files in an array BEFORE clearing input
|
| 34 |
+
const fileArray = Array.from(files);
|
| 35 |
+
console.log('Files stored in array:', fileArray.length);
|
| 36 |
+
|
| 37 |
+
// Now process each file
|
| 38 |
+
fileArray.forEach((file, index) => {
|
| 39 |
+
console.log(`Processing file ${index + 1}:`, file.name, 'Type:', file.type);
|
| 40 |
+
uploadFile(file);
|
| 41 |
+
});
|
| 42 |
+
} else {
|
| 43 |
+
console.log('No files detected in change event');
|
| 44 |
+
}
|
| 45 |
+
});
|
| 46 |
+
|
| 47 |
+
processBtn.addEventListener('click', () => {
|
| 48 |
+
console.log('Process button clicked');
|
| 49 |
+
processAllDocuments();
|
| 50 |
+
});
|
| 51 |
+
|
| 52 |
+
sendButton.addEventListener('click', () => {
|
| 53 |
+
console.log('Send button clicked');
|
| 54 |
+
sendMessage();
|
| 55 |
+
});
|
| 56 |
+
|
| 57 |
+
messageInput.addEventListener('keypress', (e) => {
|
| 58 |
+
if (e.key === 'Enter') {
|
| 59 |
+
console.log('Enter key pressed');
|
| 60 |
+
sendMessage();
|
| 61 |
+
}
|
| 62 |
+
});
|
| 63 |
+
|
| 64 |
+
// Separate function for clearing previous documents (doesn't clear current input)
|
| 65 |
+
function clearPreviousDocuments() {
|
| 66 |
+
console.log('Clearing previous documents');
|
| 67 |
+
|
| 68 |
+
// Clear the file list UI
|
| 69 |
+
fileList.innerHTML = '';
|
| 70 |
+
|
| 71 |
+
// Clear URL input
|
| 72 |
+
urlInput.value = '';
|
| 73 |
+
|
| 74 |
+
// Clear chat messages and restore initial state
|
| 75 |
+
chatMessages.innerHTML = `
|
| 76 |
+
<div class="message bot-message">
|
| 77 |
+
<div class="message-header">
|
| 78 |
+
<i class="fas fa-robot"></i> ChatWithDoc Assistant
|
| 79 |
+
</div>
|
| 80 |
+
<div class="message-content">
|
| 81 |
+
Previous documents cleared. Ready for new uploads!
|
| 82 |
+
</div>
|
| 83 |
+
</div>
|
| 84 |
+
`;
|
| 85 |
+
|
| 86 |
+
// Call backend to clear previous documents
|
| 87 |
+
fetch(`${API_BASE}clear-documents`, {
|
| 88 |
+
method: 'POST',
|
| 89 |
+
headers: {
|
| 90 |
+
'Content-Type': 'application/json'
|
| 91 |
+
}
|
| 92 |
+
})
|
| 93 |
+
.then(response => {
|
| 94 |
+
console.log('Clear documents response status:', response.status);
|
| 95 |
+
return response.json();
|
| 96 |
+
})
|
| 97 |
+
.then(data => {
|
| 98 |
+
console.log('Previous documents cleared:', data);
|
| 99 |
+
})
|
| 100 |
+
.catch(error => {
|
| 101 |
+
console.error('Error clearing documents:', error);
|
| 102 |
+
});
|
| 103 |
+
}
|
| 104 |
+
|
| 105 |
+
// Function for complete reset (used by clear button if you add one)
|
| 106 |
+
function clearAllFilesSync() {
|
| 107 |
+
console.log('Clearing all files completely');
|
| 108 |
+
|
| 109 |
+
// Clear the file input (only call this when you want to reset everything)
|
| 110 |
+
fileInput.value = '';
|
| 111 |
+
|
| 112 |
+
// Clear everything else
|
| 113 |
+
clearPreviousDocuments();
|
| 114 |
+
}
|
| 115 |
+
|
| 116 |
+
function uploadFile(file) {
|
| 117 |
+
console.log('Starting file upload for:', file.name);
|
| 118 |
+
|
| 119 |
+
const formData = new FormData();
|
| 120 |
+
formData.append('file', file);
|
| 121 |
+
|
| 122 |
+
// Show file in UI immediately
|
| 123 |
+
addFileToList(file.name, formatFileSize(file.size), 'uploading');
|
| 124 |
+
|
| 125 |
+
console.log('Making fetch request to:', `${API_BASE}upload`);
|
| 126 |
+
|
| 127 |
+
fetch(`${API_BASE}upload`, {
|
| 128 |
+
method: 'POST',
|
| 129 |
+
body: formData
|
| 130 |
+
})
|
| 131 |
+
.then(response => {
|
| 132 |
+
console.log('Upload response status:', response.status);
|
| 133 |
+
if (!response.ok) {
|
| 134 |
+
throw new Error(`HTTP error! status: ${response.status}`);
|
| 135 |
+
}
|
| 136 |
+
return response.json();
|
| 137 |
+
})
|
| 138 |
+
.then(data => {
|
| 139 |
+
console.log('Upload response data:', data);
|
| 140 |
+
if (data.error) {
|
| 141 |
+
updateFileStatus(file.name, 'error');
|
| 142 |
+
alert('Error uploading file: ' + data.error);
|
| 143 |
+
} else {
|
| 144 |
+
updateFileStatus(file.name, 'uploaded');
|
| 145 |
+
console.log('File uploaded successfully:', file.name);
|
| 146 |
+
}
|
| 147 |
+
})
|
| 148 |
+
.catch(error => {
|
| 149 |
+
console.error('Upload error:', error);
|
| 150 |
+
updateFileStatus(file.name, 'error');
|
| 151 |
+
alert('Error uploading file: ' + error.message);
|
| 152 |
+
});
|
| 153 |
+
}
|
| 154 |
+
|
| 155 |
+
function processAllDocuments() {
|
| 156 |
+
console.log('Processing all documents');
|
| 157 |
+
|
| 158 |
+
const url = urlInput.value.trim();
|
| 159 |
+
const files = document.querySelectorAll('.file-item');
|
| 160 |
+
|
| 161 |
+
console.log('URL:', url, 'Files count:', files.length);
|
| 162 |
+
|
| 163 |
+
if (files.length === 0 && !url) {
|
| 164 |
+
alert('Please upload files or enter a URL first');
|
| 165 |
+
return;
|
| 166 |
+
}
|
| 167 |
+
|
| 168 |
+
// Show processing animation
|
| 169 |
+
showProcessing();
|
| 170 |
+
|
| 171 |
+
// Process all uploaded files
|
| 172 |
+
let filePromise = Promise.resolve();
|
| 173 |
+
if (files.length > 0) {
|
| 174 |
+
// Update status to processing
|
| 175 |
+
files.forEach(fileItem => {
|
| 176 |
+
const fileName = fileItem.dataset.filename;
|
| 177 |
+
updateFileStatus(fileName, 'processing');
|
| 178 |
+
});
|
| 179 |
+
|
| 180 |
+
console.log('Calling process-documents endpoint');
|
| 181 |
+
|
| 182 |
+
filePromise = fetch(`${API_BASE}process-documents`, {
|
| 183 |
+
method: 'POST',
|
| 184 |
+
headers: {
|
| 185 |
+
'Content-Type': 'application/json'
|
| 186 |
+
}
|
| 187 |
+
})
|
| 188 |
+
.then(response => {
|
| 189 |
+
console.log('Process documents response status:', response.status);
|
| 190 |
+
if (!response.ok) {
|
| 191 |
+
throw new Error(`HTTP error! status: ${response.status}`);
|
| 192 |
+
}
|
| 193 |
+
return response.json();
|
| 194 |
+
})
|
| 195 |
+
.then(data => {
|
| 196 |
+
console.log('Process documents response:', data);
|
| 197 |
+
if (data.error) {
|
| 198 |
+
throw new Error(data.error);
|
| 199 |
+
}
|
| 200 |
+
// Mark all files as processed
|
| 201 |
+
files.forEach(fileItem => {
|
| 202 |
+
const fileName = fileItem.dataset.filename;
|
| 203 |
+
updateFileStatus(fileName, 'processed');
|
| 204 |
+
});
|
| 205 |
+
addBotMessage(`Successfully processed ${data.processed_count} files!`);
|
| 206 |
+
return data;
|
| 207 |
+
});
|
| 208 |
+
}
|
| 209 |
+
|
| 210 |
+
// Process URL if provided
|
| 211 |
+
let urlPromise = Promise.resolve();
|
| 212 |
+
if (url) {
|
| 213 |
+
console.log('Processing URL:', url);
|
| 214 |
+
|
| 215 |
+
urlPromise = fetch(`${API_BASE}process-url`, {
|
| 216 |
+
method: 'POST',
|
| 217 |
+
headers: {
|
| 218 |
+
'Content-Type': 'application/json'
|
| 219 |
+
},
|
| 220 |
+
body: JSON.stringify({ url: url })
|
| 221 |
+
})
|
| 222 |
+
.then(response => {
|
| 223 |
+
console.log('Process URL response status:', response.status);
|
| 224 |
+
return response.json();
|
| 225 |
+
})
|
| 226 |
+
.then(data => {
|
| 227 |
+
console.log('Process URL response:', data);
|
| 228 |
+
if (data.error) {
|
| 229 |
+
throw new Error(data.error);
|
| 230 |
+
}
|
| 231 |
+
addBotMessage(`URL processed successfully! Found ${data.document_info.num_pages} pages with ${data.document_info.num_chunks} text chunks.`);
|
| 232 |
+
return data;
|
| 233 |
+
});
|
| 234 |
+
}
|
| 235 |
+
|
| 236 |
+
// Wait for all processing to complete
|
| 237 |
+
Promise.all([filePromise, urlPromise])
|
| 238 |
+
.then(() => {
|
| 239 |
+
console.log('All processing completed');
|
| 240 |
+
hideProcessing();
|
| 241 |
+
addBotMessage("All documents and URLs have been processed successfully! You can now ask questions about them.");
|
| 242 |
+
})
|
| 243 |
+
.catch(error => {
|
| 244 |
+
console.error('Processing error:', error);
|
| 245 |
+
hideProcessing();
|
| 246 |
+
alert('Error processing documents: ' + error.message);
|
| 247 |
+
});
|
| 248 |
+
}
|
| 249 |
+
|
| 250 |
+
function sendMessage() {
|
| 251 |
+
const message = messageInput.value.trim();
|
| 252 |
+
console.log('Sending message:', message);
|
| 253 |
+
|
| 254 |
+
if (message) {
|
| 255 |
+
addUserMessage(message);
|
| 256 |
+
messageInput.value = '';
|
| 257 |
+
|
| 258 |
+
// Show typing indicator
|
| 259 |
+
showTypingIndicator();
|
| 260 |
+
|
| 261 |
+
fetch(`${API_BASE}chat`, {
|
| 262 |
+
method: 'POST',
|
| 263 |
+
headers: {
|
| 264 |
+
'Content-Type': 'application/json'
|
| 265 |
+
},
|
| 266 |
+
body: JSON.stringify({ message: message })
|
| 267 |
+
})
|
| 268 |
+
.then(response => {
|
| 269 |
+
console.log('Chat response status:', response.status);
|
| 270 |
+
return response.json();
|
| 271 |
+
})
|
| 272 |
+
.then(data => {
|
| 273 |
+
console.log('Chat response:', data);
|
| 274 |
+
hideTypingIndicator();
|
| 275 |
+
if (data.error) {
|
| 276 |
+
addBotMessage("Sorry, I encountered an error: " + data.error);
|
| 277 |
+
} else {
|
| 278 |
+
addBotMessage(data.response);
|
| 279 |
+
}
|
| 280 |
+
})
|
| 281 |
+
.catch(error => {
|
| 282 |
+
console.error('Chat error:', error);
|
| 283 |
+
hideTypingIndicator();
|
| 284 |
+
addBotMessage("Sorry, I encountered an error processing your request.");
|
| 285 |
+
});
|
| 286 |
+
}
|
| 287 |
+
}
|
| 288 |
+
|
| 289 |
+
function addFileToList(name, size, status = 'success') {
|
| 290 |
+
console.log('Adding file to list:', name, 'Status:', status);
|
| 291 |
+
|
| 292 |
+
const fileItem = document.createElement('div');
|
| 293 |
+
fileItem.className = 'file-item';
|
| 294 |
+
fileItem.dataset.filename = name;
|
| 295 |
+
|
| 296 |
+
let statusIcon = '';
|
| 297 |
+
if (status === 'uploading') {
|
| 298 |
+
statusIcon = '<i class="fas fa-spinner fa-spin"></i>';
|
| 299 |
+
} else if (status === 'processing') {
|
| 300 |
+
statusIcon = '<i class="fas fa-cog fa-spin"></i>';
|
| 301 |
+
} else if (status === 'error') {
|
| 302 |
+
statusIcon = '<i class="fas fa-exclamation-circle" style="color: var(--danger);"></i>';
|
| 303 |
+
} else if (status === 'processed') {
|
| 304 |
+
statusIcon = '<i class="fas fa-check-circle" style="color: var(--success);"></i>';
|
| 305 |
+
} else if (status === 'uploaded') {
|
| 306 |
+
statusIcon = '<i class="fas fa-file-alt"></i>';
|
| 307 |
+
} else {
|
| 308 |
+
statusIcon = '<i class="fas fa-file-alt"></i>';
|
| 309 |
+
}
|
| 310 |
+
|
| 311 |
+
fileItem.innerHTML = `
|
| 312 |
+
${statusIcon}
|
| 313 |
+
<div class="file-info">
|
| 314 |
+
<div class="file-name">${name}</div>
|
| 315 |
+
<div class="file-size">${size}</div>
|
| 316 |
+
</div>
|
| 317 |
+
<div class="file-actions">
|
| 318 |
+
<button title="Remove"><i class="fas fa-times"></i></button>
|
| 319 |
+
</div>
|
| 320 |
+
`;
|
| 321 |
+
|
| 322 |
+
fileList.appendChild(fileItem);
|
| 323 |
+
|
| 324 |
+
// Add remove functionality
|
| 325 |
+
fileItem.querySelector('.file-actions button').addEventListener('click', () => {
|
| 326 |
+
console.log('Removing file:', name);
|
| 327 |
+
fileItem.remove();
|
| 328 |
+
});
|
| 329 |
+
}
|
| 330 |
+
|
| 331 |
+
function updateFileStatus(name, status) {
|
| 332 |
+
console.log('Updating file status:', name, 'to', status);
|
| 333 |
+
|
| 334 |
+
const fileItems = document.querySelectorAll('.file-item');
|
| 335 |
+
fileItems.forEach(item => {
|
| 336 |
+
if (item.dataset.filename === name) {
|
| 337 |
+
let statusIcon = '';
|
| 338 |
+
if (status === 'uploading') {
|
| 339 |
+
statusIcon = '<i class="fas fa-spinner fa-spin"></i>';
|
| 340 |
+
} else if (status === 'processing') {
|
| 341 |
+
statusIcon = '<i class="fas fa-cog fa-spin"></i>';
|
| 342 |
+
} else if (status === 'error') {
|
| 343 |
+
statusIcon = '<i class="fas fa-exclamation-circle" style="color: var(--danger);"></i>';
|
| 344 |
+
} else if (status === 'processed') {
|
| 345 |
+
statusIcon = '<i class="fas fa-check-circle" style="color: var(--success);"></i>';
|
| 346 |
+
} else if (status === 'uploaded') {
|
| 347 |
+
statusIcon = '<i class="fas fa-file-alt"></i>';
|
| 348 |
+
} else {
|
| 349 |
+
statusIcon = '<i class="fas fa-file-alt"></i>';
|
| 350 |
+
}
|
| 351 |
+
const iconElement = item.querySelector('i');
|
| 352 |
+
if (iconElement) {
|
| 353 |
+
iconElement.outerHTML = statusIcon;
|
| 354 |
+
}
|
| 355 |
+
}
|
| 356 |
+
});
|
| 357 |
+
}
|
| 358 |
+
|
| 359 |
+
function formatFileSize(bytes) {
|
| 360 |
+
if (bytes === 0) return '0 Bytes';
|
| 361 |
+
const k = 1024;
|
| 362 |
+
const sizes = ['Bytes', 'KB', 'MB', 'GB'];
|
| 363 |
+
const i = Math.floor(Math.log(bytes) / Math.log(k));
|
| 364 |
+
return parseFloat((bytes / Math.pow(k, i)).toFixed(2)) + ' ' + sizes[i];
|
| 365 |
+
}
|
| 366 |
+
|
| 367 |
+
function addUserMessage(text) {
|
| 368 |
+
const messageDiv = document.createElement('div');
|
| 369 |
+
messageDiv.className = 'message user-message';
|
| 370 |
+
messageDiv.innerHTML = `
|
| 371 |
+
<div class="message-header">
|
| 372 |
+
<i class="fas fa-user"></i> You
|
| 373 |
+
</div>
|
| 374 |
+
<div class="message-content">${text}</div>
|
| 375 |
+
`;
|
| 376 |
+
chatMessages.appendChild(messageDiv);
|
| 377 |
+
chatMessages.scrollTop = chatMessages.scrollHeight;
|
| 378 |
+
}
|
| 379 |
+
|
| 380 |
+
function addBotMessage(text) {
|
| 381 |
+
const messageDiv = document.createElement('div');
|
| 382 |
+
// Convert markdown to HTML
|
| 383 |
+
const markdownHTML = DOMPurify.sanitize(marked.parse(text));
|
| 384 |
+
messageDiv.className = 'message bot-message';
|
| 385 |
+
messageDiv.innerHTML = `
|
| 386 |
+
<div class="message-header">
|
| 387 |
+
<i class="fas fa-robot"></i> ChatWithDoc Assistant
|
| 388 |
+
</div>
|
| 389 |
+
<div class="message-content">${markdownHTML}</div>
|
| 390 |
+
`;
|
| 391 |
+
chatMessages.appendChild(messageDiv);
|
| 392 |
+
chatMessages.scrollTop = chatMessages.scrollHeight;
|
| 393 |
+
}
|
| 394 |
+
|
| 395 |
+
function showTypingIndicator() {
|
| 396 |
+
const typingIndicator = document.createElement('div');
|
| 397 |
+
typingIndicator.className = 'typing-indicator';
|
| 398 |
+
typingIndicator.id = 'typingIndicator';
|
| 399 |
+
typingIndicator.innerHTML = `
|
| 400 |
+
<div class="typing-dot"></div>
|
| 401 |
+
<div class="typing-dot"></div>
|
| 402 |
+
<div class="typing-dot"></div>
|
| 403 |
+
`;
|
| 404 |
+
chatMessages.appendChild(typingIndicator);
|
| 405 |
+
chatMessages.scrollTop = chatMessages.scrollHeight;
|
| 406 |
+
}
|
| 407 |
+
|
| 408 |
+
function hideTypingIndicator() {
|
| 409 |
+
const typingIndicator = document.getElementById('typingIndicator');
|
| 410 |
+
if (typingIndicator) {
|
| 411 |
+
typingIndicator.remove();
|
| 412 |
+
}
|
| 413 |
+
}
|
| 414 |
+
|
| 415 |
+
function showProcessing() {
|
| 416 |
+
console.log('Showing processing indicator');
|
| 417 |
+
const processingDiv = document.createElement('div');
|
| 418 |
+
processingDiv.className = 'processing';
|
| 419 |
+
processingDiv.id = 'processingIndicator';
|
| 420 |
+
processingDiv.innerHTML = '<i class="fas fa-spinner fa-spin"></i> Processing documents and URLs...';
|
| 421 |
+
|
| 422 |
+
// Replace chat messages with processing indicator
|
| 423 |
+
chatMessages.innerHTML = '';
|
| 424 |
+
chatMessages.appendChild(processingDiv);
|
| 425 |
+
}
|
| 426 |
+
|
| 427 |
+
function hideProcessing() {
|
| 428 |
+
console.log('Hiding processing indicator');
|
| 429 |
+
const processingIndicator = document.getElementById('processingIndicator');
|
| 430 |
+
if (processingIndicator) {
|
| 431 |
+
processingIndicator.remove();
|
| 432 |
+
}
|
| 433 |
+
|
| 434 |
+
// Restore initial message
|
| 435 |
+
chatMessages.innerHTML = `
|
| 436 |
+
<div class="message bot-message">
|
| 437 |
+
<div class="message-header">
|
| 438 |
+
<i class="fas fa-robot"></i> ChatWithDoc Assistant
|
| 439 |
+
</div>
|
| 440 |
+
<div class="message-content">
|
| 441 |
+
Hello! I'm your document assistant. Upload some documents or enter URLs, then ask me anything about their content. I'll help you find answers quickly.
|
| 442 |
+
</div>
|
| 443 |
+
</div>
|
| 444 |
+
`;
|
| 445 |
+
}
|
| 446 |
+
|
| 447 |
+
// Drag and drop functionality
|
| 448 |
+
uploadArea.addEventListener('dragover', (e) => {
|
| 449 |
+
e.preventDefault();
|
| 450 |
+
console.log('Drag over upload area');
|
| 451 |
+
uploadArea.style.borderColor = 'var(--primary)';
|
| 452 |
+
uploadArea.style.backgroundColor = 'rgba(67, 97, 238, 0.1)';
|
| 453 |
+
});
|
| 454 |
+
|
| 455 |
+
uploadArea.addEventListener('dragleave', () => {
|
| 456 |
+
console.log('Drag leave upload area');
|
| 457 |
+
uploadArea.style.borderColor = 'var(--light-gray)';
|
| 458 |
+
uploadArea.style.backgroundColor = '';
|
| 459 |
+
});
|
| 460 |
+
|
| 461 |
+
uploadArea.addEventListener('drop', (e) => {
|
| 462 |
+
e.preventDefault();
|
| 463 |
+
console.log('Files dropped on upload area');
|
| 464 |
+
uploadArea.style.borderColor = 'var(--light-gray)';
|
| 465 |
+
uploadArea.style.backgroundColor = '';
|
| 466 |
+
|
| 467 |
+
const files = e.dataTransfer.files;
|
| 468 |
+
console.log('Dropped files count:', files.length);
|
| 469 |
+
|
| 470 |
+
if (files.length > 0) {
|
| 471 |
+
// Clear previous documents first
|
| 472 |
+
clearPreviousDocuments();
|
| 473 |
+
|
| 474 |
+
// Store files in array before processing
|
| 475 |
+
const fileArray = Array.from(files);
|
| 476 |
+
|
| 477 |
+
// Process each file
|
| 478 |
+
fileArray.forEach(file => {
|
| 479 |
+
console.log('Processing dropped file:', file.name, 'Type:', file.type);
|
| 480 |
+
if (file.type === 'application/pdf' ||
|
| 481 |
+
file.type === 'application/msword' ||
|
| 482 |
+
file.type === 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' ||
|
| 483 |
+
file.type === 'text/plain') {
|
| 484 |
+
uploadFile(file);
|
| 485 |
+
} else {
|
| 486 |
+
console.log('Unsupported file type:', file.type);
|
| 487 |
+
alert(`Unsupported file type: ${file.type}. Please upload PDF, DOC, DOCX, or TXT files.`);
|
| 488 |
+
}
|
| 489 |
+
});
|
| 490 |
+
}
|
| 491 |
+
});
|
| 492 |
+
|
| 493 |
+
console.log('All event listeners attached successfully');
|
main.py
ADDED
|
@@ -0,0 +1,269 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import FastAPI, File, UploadFile
|
| 2 |
+
from fastapi.responses import JSONResponse, FileResponse
|
| 3 |
+
from fastapi.staticfiles import StaticFiles
|
| 4 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 5 |
+
import shutil
|
| 6 |
+
import os
|
| 7 |
+
from pydantic import BaseModel, Field
|
| 8 |
+
from typing import Dict, Any, List
|
| 9 |
+
from task_manager import DocumentManager
|
| 10 |
+
import warnings
|
| 11 |
+
|
| 12 |
+
# Disable all LangSmith related warnings
|
| 13 |
+
warnings.filterwarnings("ignore", message=".*LangSmith.*")
|
| 14 |
+
warnings.filterwarnings("ignore", message=".*API key.*")
|
| 15 |
+
|
| 16 |
+
# Also disable UserAgent warning
|
| 17 |
+
os.environ["LANGCHAIN_USER_AGENT"] = "ChatWithDoc/1.0"
|
| 18 |
+
|
| 19 |
+
app = FastAPI()
|
| 20 |
+
|
| 21 |
+
# Initialize document manager
|
| 22 |
+
doc_manager = DocumentManager()
|
| 23 |
+
|
| 24 |
+
# Store uploaded files temporarily before processing
|
| 25 |
+
uploaded_files = []
|
| 26 |
+
|
| 27 |
+
class UploadResponse(BaseModel):
|
| 28 |
+
message: str
|
| 29 |
+
document_info: Dict[str, Any]
|
| 30 |
+
|
| 31 |
+
class URLRequest(BaseModel):
|
| 32 |
+
url: str = Field(..., description="URL of the document to process")
|
| 33 |
+
|
| 34 |
+
class ChatRequest(BaseModel):
|
| 35 |
+
message: str = Field(..., description="User's question")
|
| 36 |
+
|
| 37 |
+
class ChatResponse(BaseModel):
|
| 38 |
+
response: str = Field(..., description="Answer to the user's question")
|
| 39 |
+
|
| 40 |
+
class ProcessResponse(BaseModel):
|
| 41 |
+
message: str
|
| 42 |
+
processed_count: int
|
| 43 |
+
errors: List[str] = []
|
| 44 |
+
|
| 45 |
+
# Allow CORS (update this with your frontend URL in production)
|
| 46 |
+
app.add_middleware(
|
| 47 |
+
CORSMiddleware,
|
| 48 |
+
allow_origins=["*"], # Change to your React frontend URL in prod
|
| 49 |
+
allow_credentials=True,
|
| 50 |
+
allow_methods=["*"],
|
| 51 |
+
allow_headers=["*"],
|
| 52 |
+
)
|
| 53 |
+
|
| 54 |
+
UPLOAD_DIR = "uploaded_files"
|
| 55 |
+
os.makedirs(UPLOAD_DIR, exist_ok=True)
|
| 56 |
+
|
| 57 |
+
@app.post("/upload")
|
| 58 |
+
async def upload_file(file: UploadFile = File(...)):
|
| 59 |
+
"""
|
| 60 |
+
Upload a document (just stores it, doesn't process yet)
|
| 61 |
+
|
| 62 |
+
- **file**: The document file to upload (PDF, DOCX, TXT)
|
| 63 |
+
- Returns upload confirmation
|
| 64 |
+
"""
|
| 65 |
+
print(f"Received file: {file.filename} of type {file.content_type}")
|
| 66 |
+
|
| 67 |
+
# Get file extension and determine content type
|
| 68 |
+
file_extension = file.filename.lower().split('.')[-1] if '.' in file.filename else ''
|
| 69 |
+
|
| 70 |
+
# Map file extensions to content types
|
| 71 |
+
extension_to_type = {
|
| 72 |
+
'pdf': 'application/pdf',
|
| 73 |
+
'txt': 'text/plain',
|
| 74 |
+
'doc': 'application/msword',
|
| 75 |
+
'docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
|
| 76 |
+
}
|
| 77 |
+
|
| 78 |
+
# Use file extension to determine content type if content_type is not reliable
|
| 79 |
+
if file.content_type and file.content_type in ["application/pdf", "text/plain", "application/msword",
|
| 80 |
+
"application/vnd.openxmlformats-officedocument.wordprocessingml.document"]:
|
| 81 |
+
content_type = file.content_type
|
| 82 |
+
elif file_extension in extension_to_type:
|
| 83 |
+
content_type = extension_to_type[file_extension]
|
| 84 |
+
else:
|
| 85 |
+
return JSONResponse(status_code=400, content={"error": f"Unsupported file type: {file_extension}"})
|
| 86 |
+
|
| 87 |
+
print(f"Using content type: {content_type}")
|
| 88 |
+
|
| 89 |
+
file_location = os.path.join(UPLOAD_DIR, file.filename)
|
| 90 |
+
|
| 91 |
+
# Save the file
|
| 92 |
+
with open(file_location, "wb") as buffer:
|
| 93 |
+
shutil.copyfileobj(file.file, buffer)
|
| 94 |
+
|
| 95 |
+
# Store file info for later processing
|
| 96 |
+
file_info = {
|
| 97 |
+
"filename": file.filename,
|
| 98 |
+
"file_location": file_location,
|
| 99 |
+
"content_type": content_type
|
| 100 |
+
}
|
| 101 |
+
uploaded_files.append(file_info)
|
| 102 |
+
|
| 103 |
+
print("File uploaded successfully, ready for processing")
|
| 104 |
+
return UploadResponse(
|
| 105 |
+
message="File uploaded successfully",
|
| 106 |
+
document_info={
|
| 107 |
+
"filename": file.filename,
|
| 108 |
+
"content_type": content_type,
|
| 109 |
+
"status": "uploaded",
|
| 110 |
+
"location": file_location
|
| 111 |
+
}
|
| 112 |
+
)
|
| 113 |
+
|
| 114 |
+
@app.post("/process-documents")
|
| 115 |
+
async def process_documents():
|
| 116 |
+
"""
|
| 117 |
+
Process all uploaded files using your processor architecture
|
| 118 |
+
"""
|
| 119 |
+
try:
|
| 120 |
+
if not uploaded_files:
|
| 121 |
+
return JSONResponse(status_code=400, content={"error": "No files uploaded"})
|
| 122 |
+
|
| 123 |
+
processed_count = 0
|
| 124 |
+
errors = []
|
| 125 |
+
|
| 126 |
+
# Process each uploaded file
|
| 127 |
+
for file_info in uploaded_files:
|
| 128 |
+
try:
|
| 129 |
+
result = doc_manager.process_document(file_info["file_location"], file_info["content_type"])
|
| 130 |
+
|
| 131 |
+
if result["status"] == "success":
|
| 132 |
+
processed_count += 1
|
| 133 |
+
print(f"Successfully processed: {file_info['filename']}")
|
| 134 |
+
else:
|
| 135 |
+
error_msg = f"{file_info['filename']}: {result['message']}"
|
| 136 |
+
errors.append(error_msg)
|
| 137 |
+
print(f"Failed to process {file_info['filename']}: {result['message']}")
|
| 138 |
+
|
| 139 |
+
except Exception as e:
|
| 140 |
+
error_msg = f"{file_info['filename']}: {str(e)}"
|
| 141 |
+
errors.append(error_msg)
|
| 142 |
+
print(f"Exception processing {file_info['filename']}: {e}")
|
| 143 |
+
|
| 144 |
+
# Clear uploaded files list after processing attempt
|
| 145 |
+
uploaded_files.clear()
|
| 146 |
+
|
| 147 |
+
if processed_count == 0:
|
| 148 |
+
return JSONResponse(status_code=400, content={
|
| 149 |
+
"error": f"Failed to process any files. Errors: {'; '.join(errors)}"
|
| 150 |
+
})
|
| 151 |
+
|
| 152 |
+
response_message = f"Successfully processed {processed_count} files"
|
| 153 |
+
if errors:
|
| 154 |
+
response_message += f". {len(errors)} files had errors."
|
| 155 |
+
|
| 156 |
+
return ProcessResponse(
|
| 157 |
+
message=response_message,
|
| 158 |
+
processed_count=processed_count,
|
| 159 |
+
errors=errors
|
| 160 |
+
)
|
| 161 |
+
|
| 162 |
+
except Exception as e:
|
| 163 |
+
return JSONResponse(status_code=500, content={"error": str(e)})
|
| 164 |
+
|
| 165 |
+
@app.post("/process-url")
|
| 166 |
+
async def process_url(url_request: URLRequest):
|
| 167 |
+
"""
|
| 168 |
+
Process a document from URL using your web processor
|
| 169 |
+
|
| 170 |
+
- **url**: The URL of the document to process
|
| 171 |
+
- Returns document processing information
|
| 172 |
+
"""
|
| 173 |
+
url = url_request.url
|
| 174 |
+
|
| 175 |
+
try:
|
| 176 |
+
# Process the URL using your web processor
|
| 177 |
+
result = doc_manager.process_url(url)
|
| 178 |
+
print("URL processing result:", result)
|
| 179 |
+
|
| 180 |
+
if result["status"] == "error":
|
| 181 |
+
return JSONResponse(status_code=400, content={"error": result["message"]})
|
| 182 |
+
|
| 183 |
+
return UploadResponse(
|
| 184 |
+
message="URL processed successfully",
|
| 185 |
+
document_info={
|
| 186 |
+
"url": url,
|
| 187 |
+
"status": "processed",
|
| 188 |
+
"type": "url",
|
| 189 |
+
"title": result.get("title", "Untitled"),
|
| 190 |
+
"num_pages": result.get("num_pages", 0),
|
| 191 |
+
"num_chunks": result.get("num_chunks", 0),
|
| 192 |
+
"word_count": result.get("word_count", 0)
|
| 193 |
+
}
|
| 194 |
+
)
|
| 195 |
+
|
| 196 |
+
except Exception as e:
|
| 197 |
+
return JSONResponse(status_code=500, content={"error": str(e)})
|
| 198 |
+
|
| 199 |
+
@app.post("/chat")
|
| 200 |
+
async def chat_with_doc(chat_request: ChatRequest):
|
| 201 |
+
"""
|
| 202 |
+
Process a query against processed documents using your processors
|
| 203 |
+
|
| 204 |
+
- **query**: The user's question
|
| 205 |
+
- Returns an answer
|
| 206 |
+
"""
|
| 207 |
+
try:
|
| 208 |
+
print(f"Received query: {chat_request.message}")
|
| 209 |
+
result = doc_manager.query_document(chat_request.message)
|
| 210 |
+
print("Query result:", result)
|
| 211 |
+
|
| 212 |
+
if result["status"] == "error":
|
| 213 |
+
return JSONResponse(status_code=400, content={"error": result["message"]})
|
| 214 |
+
|
| 215 |
+
return ChatResponse(
|
| 216 |
+
response=result["answer"]
|
| 217 |
+
)
|
| 218 |
+
except Exception as e:
|
| 219 |
+
return JSONResponse(status_code=500, content={"error": str(e)})
|
| 220 |
+
|
| 221 |
+
@app.post("/clear-documents")
|
| 222 |
+
async def clear_documents():
|
| 223 |
+
"""
|
| 224 |
+
Clear all previously processed documents and uploaded files
|
| 225 |
+
"""
|
| 226 |
+
print("Clearing all documents...")
|
| 227 |
+
try:
|
| 228 |
+
doc_manager.clear_documents()
|
| 229 |
+
uploaded_files.clear()
|
| 230 |
+
return {"message": "Documents cleared successfully"}
|
| 231 |
+
except Exception as e:
|
| 232 |
+
return JSONResponse(status_code=500, content={"error": str(e)})
|
| 233 |
+
|
| 234 |
+
@app.get("/status")
|
| 235 |
+
async def get_status():
|
| 236 |
+
"""
|
| 237 |
+
Get current status of uploaded and processed documents
|
| 238 |
+
"""
|
| 239 |
+
try:
|
| 240 |
+
# Get status from your document manager if it has a get_status method
|
| 241 |
+
if hasattr(doc_manager, 'get_status'):
|
| 242 |
+
doc_status = doc_manager.get_status()
|
| 243 |
+
else:
|
| 244 |
+
# Fallback for original single-document architecture
|
| 245 |
+
doc_status = {
|
| 246 |
+
"total_documents": 1 if hasattr(doc_manager, 'current_processor') and doc_manager.current_processor else 0,
|
| 247 |
+
"current_document": getattr(doc_manager, 'current_document', None)
|
| 248 |
+
}
|
| 249 |
+
|
| 250 |
+
return {
|
| 251 |
+
"uploaded_files": len(uploaded_files),
|
| 252 |
+
"status": doc_status
|
| 253 |
+
}
|
| 254 |
+
except Exception as e:
|
| 255 |
+
return JSONResponse(status_code=500, content={"error": str(e)})
|
| 256 |
+
|
| 257 |
+
# Health check endpoint
|
| 258 |
+
@app.get("/health")
|
| 259 |
+
async def health_check():
|
| 260 |
+
"""Simple health check"""
|
| 261 |
+
return {"status": "healthy", "message": "ChatWithDoc API is running"}
|
| 262 |
+
|
| 263 |
+
# Mount the frontend directory as a static path.
|
| 264 |
+
# This should be after all API routes to ensure they are not overridden.
|
| 265 |
+
app.mount("/", StaticFiles(directory="frontend", html=True), name="frontend")
|
| 266 |
+
|
| 267 |
+
if __name__ == "__main__":
|
| 268 |
+
import uvicorn
|
| 269 |
+
uvicorn.run(app, host="0.0.0.0", port=7860)
|
pdfHandler.py
ADDED
|
@@ -0,0 +1,141 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from langchain_community.document_loaders import PyPDFLoader
|
| 2 |
+
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
| 3 |
+
from langchain.chat_models import init_chat_model
|
| 4 |
+
from langchain_huggingface import HuggingFaceEmbeddings
|
| 5 |
+
import faiss
|
| 6 |
+
from langchain_community.docstore.in_memory import InMemoryDocstore
|
| 7 |
+
from langchain_community.vectorstores import FAISS
|
| 8 |
+
import os
|
| 9 |
+
from langchain import hub
|
| 10 |
+
from dotenv import load_dotenv
|
| 11 |
+
from langgraph.graph import START, StateGraph
|
| 12 |
+
from typing import List, Dict, Any, Optional
|
| 13 |
+
from pydantic import BaseModel, Field
|
| 14 |
+
from langchain.docstore.document import Document
|
| 15 |
+
|
| 16 |
+
load_dotenv()
|
| 17 |
+
|
| 18 |
+
class State(BaseModel):
|
| 19 |
+
question: str = Field(..., description="Type your question here")
|
| 20 |
+
context: List[Document] = Field(
|
| 21 |
+
default_factory=list,
|
| 22 |
+
description="A list of Document objects",
|
| 23 |
+
)
|
| 24 |
+
answer: str = Field(default="", description="Answer will be here")
|
| 25 |
+
|
| 26 |
+
class PDFProcessor:
|
| 27 |
+
def __init__(self):
|
| 28 |
+
# Load model provider
|
| 29 |
+
if not os.environ.get("GOOGLE_API_KEY"):
|
| 30 |
+
raise ValueError("Google Gemini API key not found in environment variables")
|
| 31 |
+
|
| 32 |
+
self.llm = init_chat_model("gemini-2.5-flash", model_provider="google_genai")
|
| 33 |
+
self.embedding_model = HuggingFaceEmbeddings(
|
| 34 |
+
model_name="sentence-transformers/all-MiniLM-L6-v2",
|
| 35 |
+
model_kwargs={"device": "cpu"}
|
| 36 |
+
)
|
| 37 |
+
self.prompt = hub.pull("rlm/rag-prompt")
|
| 38 |
+
self.vector_store = None
|
| 39 |
+
self.chunk_size = 1000
|
| 40 |
+
self.chunk_overlap = 200
|
| 41 |
+
|
| 42 |
+
def process_pdf(self, file_path: str) -> Dict[str, Any]:
|
| 43 |
+
"""
|
| 44 |
+
Process a PDF file and prepare it for querying
|
| 45 |
+
|
| 46 |
+
Args:
|
| 47 |
+
file_path (str): Path to the PDF file
|
| 48 |
+
|
| 49 |
+
Returns:
|
| 50 |
+
Dict[str, Any]: Processing status and information
|
| 51 |
+
"""
|
| 52 |
+
try:
|
| 53 |
+
print(f"Processing PDF file: {file_path}")
|
| 54 |
+
# Document Loading
|
| 55 |
+
loader = PyPDFLoader(file_path)
|
| 56 |
+
pages = loader.load()
|
| 57 |
+
|
| 58 |
+
# Text Splitting
|
| 59 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=self.chunk_size, chunk_overlap=self.chunk_overlap)
|
| 60 |
+
texts = text_splitter.split_documents(pages)
|
| 61 |
+
|
| 62 |
+
# Vector Store Setup
|
| 63 |
+
embedding_dim = len(self.embedding_model.embed_query("test"))
|
| 64 |
+
index = faiss.IndexFlatL2(embedding_dim)
|
| 65 |
+
|
| 66 |
+
self.vector_store = FAISS(
|
| 67 |
+
embedding_function=self.embedding_model,
|
| 68 |
+
index=index,
|
| 69 |
+
docstore=InMemoryDocstore(),
|
| 70 |
+
index_to_docstore_id={},
|
| 71 |
+
)
|
| 72 |
+
|
| 73 |
+
# Index chunks
|
| 74 |
+
self.vector_store.add_documents(documents=texts)
|
| 75 |
+
|
| 76 |
+
return {
|
| 77 |
+
"status": "success",
|
| 78 |
+
"message": "PDF processed successfully",
|
| 79 |
+
"num_pages": len(pages),
|
| 80 |
+
"num_chunks": len(texts)
|
| 81 |
+
}
|
| 82 |
+
except Exception as e:
|
| 83 |
+
return {
|
| 84 |
+
"status": "error",
|
| 85 |
+
"message": f"Error processing PDF: {str(e)}"
|
| 86 |
+
}
|
| 87 |
+
|
| 88 |
+
def query_response(self, query: str) -> Dict[str, Any]:
|
| 89 |
+
"""
|
| 90 |
+
Query the processed document
|
| 91 |
+
|
| 92 |
+
Args:
|
| 93 |
+
query (str): The question to ask about the document
|
| 94 |
+
|
| 95 |
+
Returns:
|
| 96 |
+
Dict[str, Any]: Answer and relevant context
|
| 97 |
+
"""
|
| 98 |
+
if not self.vector_store:
|
| 99 |
+
return {
|
| 100 |
+
"status": "error",
|
| 101 |
+
"message": "No document has been processed yet"
|
| 102 |
+
}
|
| 103 |
+
|
| 104 |
+
try:
|
| 105 |
+
# Create state graph
|
| 106 |
+
graph_builder = StateGraph(State)
|
| 107 |
+
|
| 108 |
+
# Define retrieval step
|
| 109 |
+
def retrieve(state: State):
|
| 110 |
+
retrieved_docs = self.vector_store.similarity_search(state.question)
|
| 111 |
+
return {"context": retrieved_docs}
|
| 112 |
+
|
| 113 |
+
# Define generation step
|
| 114 |
+
def generate(state: State):
|
| 115 |
+
docs_content = "\n\n".join(doc.page_content for doc in state.context)
|
| 116 |
+
messages = self.prompt.invoke({
|
| 117 |
+
"question": state.question,
|
| 118 |
+
"context": docs_content
|
| 119 |
+
})
|
| 120 |
+
response = self.llm.invoke(messages)
|
| 121 |
+
return {"answer": response.content}
|
| 122 |
+
|
| 123 |
+
# Build and compile the graph
|
| 124 |
+
graph = graph_builder.add_sequence([retrieve, generate]).set_entry_point("retrieve").compile()
|
| 125 |
+
|
| 126 |
+
# Execute the query
|
| 127 |
+
response = graph.invoke({
|
| 128 |
+
"question": query
|
| 129 |
+
})
|
| 130 |
+
|
| 131 |
+
return {
|
| 132 |
+
"status": "success",
|
| 133 |
+
"answer": response["answer"],
|
| 134 |
+
"query": query
|
| 135 |
+
}
|
| 136 |
+
except Exception as e:
|
| 137 |
+
return {
|
| 138 |
+
"status": "error",
|
| 139 |
+
"message": f"Error querying document: {str(e)}"
|
| 140 |
+
}
|
| 141 |
+
|
requirements.txt
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Core LangChain dependencies
|
| 2 |
+
langchain
|
| 3 |
+
langchain-community
|
| 4 |
+
langchain-core
|
| 5 |
+
langchain-text-splitters
|
| 6 |
+
langchain-google-genai
|
| 7 |
+
langgraph
|
| 8 |
+
grandalf
|
| 9 |
+
|
| 10 |
+
# FastAPI and web framework
|
| 11 |
+
fastapi
|
| 12 |
+
uvicorn[standard]
|
| 13 |
+
python-multipart
|
| 14 |
+
pydantic
|
| 15 |
+
|
| 16 |
+
# Document processing
|
| 17 |
+
PyPDF2
|
| 18 |
+
python-docx
|
| 19 |
+
docx2txt
|
| 20 |
+
unstructured
|
| 21 |
+
beautifulsoup4
|
| 22 |
+
requests
|
| 23 |
+
|
| 24 |
+
# Embeddings and vector stores - Fixed versions
|
| 25 |
+
sentence-transformers
|
| 26 |
+
faiss-cpu
|
| 27 |
+
numpy
|
| 28 |
+
torch
|
| 29 |
+
transformers
|
| 30 |
+
huggingface-hub
|
| 31 |
+
|
| 32 |
+
# Additional utilities
|
| 33 |
+
python-magic
|
| 34 |
+
python-magic-bin
|
| 35 |
+
pypdf
|
| 36 |
+
lxml
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
# Web scraping
|
| 41 |
+
requests
|
| 42 |
+
beautifulsoup4
|
task_manager.py
ADDED
|
@@ -0,0 +1,145 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pdfHandler import PDFProcessor
|
| 2 |
+
from docHandler import DocProcessor
|
| 3 |
+
from txtHandler import TextProcessor
|
| 4 |
+
from webHandler import WebProcessor
|
| 5 |
+
from typing import Dict, Any, List
|
| 6 |
+
|
| 7 |
+
class DocumentManager:
|
| 8 |
+
def __init__(self):
|
| 9 |
+
self.pdf_processor = PDFProcessor()
|
| 10 |
+
self.doc_processor = DocProcessor()
|
| 11 |
+
self.txt_processor = TextProcessor()
|
| 12 |
+
self.web_processor = WebProcessor()
|
| 13 |
+
|
| 14 |
+
# Store multiple processed documents
|
| 15 |
+
self.processed_documents = [] # List of {"processor": processor, "file_path": path, "content_type": type}
|
| 16 |
+
self.all_content = "" # Combined content for multi-document queries
|
| 17 |
+
|
| 18 |
+
def process_document(self, file_path: str, content_type: str) -> Dict[str, Any]:
|
| 19 |
+
try:
|
| 20 |
+
result = {"status": "error", "message": "Unknown file type"}
|
| 21 |
+
processor = None
|
| 22 |
+
|
| 23 |
+
print(f"Processing file: {file_path} with content type: {content_type}")
|
| 24 |
+
|
| 25 |
+
if content_type == "application/pdf":
|
| 26 |
+
result = self.pdf_processor.process_pdf(file_path)
|
| 27 |
+
processor = self.pdf_processor
|
| 28 |
+
elif content_type == "application/msword":
|
| 29 |
+
result = self.doc_processor.process_docx(file_path)
|
| 30 |
+
processor = self.doc_processor
|
| 31 |
+
elif content_type == "text/plain":
|
| 32 |
+
result = self.txt_processor.process_text(file_path)
|
| 33 |
+
processor = self.txt_processor
|
| 34 |
+
elif content_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
|
| 35 |
+
result = self.doc_processor.process_docx(file_path)
|
| 36 |
+
processor = self.doc_processor
|
| 37 |
+
|
| 38 |
+
if result["status"] == "success" and processor:
|
| 39 |
+
# Add to processed documents list
|
| 40 |
+
doc_info = {
|
| 41 |
+
"processor": processor,
|
| 42 |
+
"file_path": file_path,
|
| 43 |
+
"content_type": content_type,
|
| 44 |
+
"filename": file_path.split('/')[-1] # Extract filename
|
| 45 |
+
}
|
| 46 |
+
self.processed_documents.append(doc_info)
|
| 47 |
+
|
| 48 |
+
# Update combined content for multi-document queries
|
| 49 |
+
# Assuming processors have a method to get content
|
| 50 |
+
try:
|
| 51 |
+
if hasattr(processor, 'get_content'):
|
| 52 |
+
content = processor.get_content()
|
| 53 |
+
self.all_content += f"\n\n--- Document: {doc_info['filename']} ---\n{content}"
|
| 54 |
+
except:
|
| 55 |
+
pass
|
| 56 |
+
|
| 57 |
+
print(f"Document added to collection. Total documents: {len(self.processed_documents)}")
|
| 58 |
+
|
| 59 |
+
return result
|
| 60 |
+
except Exception as e:
|
| 61 |
+
return {"status": "error", "message": str(e)}
|
| 62 |
+
|
| 63 |
+
def query_document(self, query: str) -> Dict[str, Any]:
|
| 64 |
+
if not self.processed_documents:
|
| 65 |
+
return {"status": "error", "message": "No documents processed"}
|
| 66 |
+
|
| 67 |
+
print(f"Querying {len(self.processed_documents)} documents with question: {query}")
|
| 68 |
+
|
| 69 |
+
try:
|
| 70 |
+
# Strategy 1: Try to query each document and combine results
|
| 71 |
+
all_responses = []
|
| 72 |
+
|
| 73 |
+
for i, doc_info in enumerate(self.processed_documents):
|
| 74 |
+
processor = doc_info["processor"]
|
| 75 |
+
filename = doc_info["filename"]
|
| 76 |
+
just_filename = filename.split('\\')[-1]
|
| 77 |
+
|
| 78 |
+
# Query individual document
|
| 79 |
+
try:
|
| 80 |
+
response = processor.query_response(query)
|
| 81 |
+
if response.get("status") == "success":
|
| 82 |
+
answer = response.get("answer", "")
|
| 83 |
+
if answer and answer.strip():
|
| 84 |
+
all_responses.append(f"From {just_filename}:\n {answer}")
|
| 85 |
+
except Exception as e:
|
| 86 |
+
print(f"Error querying {filename}: {e}")
|
| 87 |
+
continue
|
| 88 |
+
|
| 89 |
+
if not all_responses:
|
| 90 |
+
return {"status": "error", "message": "No relevant information found in any documents"}
|
| 91 |
+
|
| 92 |
+
# Combine all responses
|
| 93 |
+
combined_answer = "\n\n".join(all_responses)
|
| 94 |
+
|
| 95 |
+
return {
|
| 96 |
+
"status": "success",
|
| 97 |
+
"answer": combined_answer
|
| 98 |
+
}
|
| 99 |
+
|
| 100 |
+
except Exception as e:
|
| 101 |
+
# Fallback: Use the last processed document
|
| 102 |
+
print(f"Multi-document query failed, using last document: {e}")
|
| 103 |
+
last_processor = self.processed_documents[-1]["processor"]
|
| 104 |
+
return last_processor.query_response(query)
|
| 105 |
+
|
| 106 |
+
def clear_documents(self):
|
| 107 |
+
"""Clear all previously processed documents"""
|
| 108 |
+
self.processed_documents = []
|
| 109 |
+
self.all_content = ""
|
| 110 |
+
print("All documents cleared - ready for new uploads")
|
| 111 |
+
|
| 112 |
+
def process_url(self, url: str) -> Dict[str, Any]:
|
| 113 |
+
"""Process a URL and add it to the document collection"""
|
| 114 |
+
try:
|
| 115 |
+
result = self.web_processor.process_url(url)
|
| 116 |
+
if result["status"] == "success":
|
| 117 |
+
# Add URL to processed documents
|
| 118 |
+
doc_info = {
|
| 119 |
+
"processor": self.web_processor,
|
| 120 |
+
"file_path": url,
|
| 121 |
+
"content_type": "text/html",
|
| 122 |
+
"filename": f"webpage_{url.split('/')[-1] or 'index'}"
|
| 123 |
+
}
|
| 124 |
+
self.processed_documents.append(doc_info)
|
| 125 |
+
|
| 126 |
+
# Update combined content
|
| 127 |
+
try:
|
| 128 |
+
if hasattr(self.web_processor, 'get_content'):
|
| 129 |
+
content = self.web_processor.get_content()
|
| 130 |
+
self.all_content += f"\n\n--- Web Page: {url} ---\n{content}"
|
| 131 |
+
except:
|
| 132 |
+
pass
|
| 133 |
+
|
| 134 |
+
print(f"URL processed and added to collection: {url}")
|
| 135 |
+
return result
|
| 136 |
+
except Exception as e:
|
| 137 |
+
return {"status": "error", "message": str(e)}
|
| 138 |
+
|
| 139 |
+
def get_status(self) -> Dict[str, Any]:
|
| 140 |
+
"""Get current status of processed documents"""
|
| 141 |
+
return {
|
| 142 |
+
"total_documents": len(self.processed_documents),
|
| 143 |
+
"document_types": list(set([doc["content_type"] for doc in self.processed_documents])),
|
| 144 |
+
"filenames": [doc["filename"] for doc in self.processed_documents]
|
| 145 |
+
}
|
txtHandler.py
ADDED
|
@@ -0,0 +1,141 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from langchain_community.document_loaders import TextLoader
|
| 2 |
+
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
| 3 |
+
from langchain.chat_models import init_chat_model
|
| 4 |
+
from langchain_huggingface import HuggingFaceEmbeddings
|
| 5 |
+
import faiss
|
| 6 |
+
from langchain_community.docstore.in_memory import InMemoryDocstore
|
| 7 |
+
from langchain_community.vectorstores import FAISS
|
| 8 |
+
import os
|
| 9 |
+
from langchain import hub
|
| 10 |
+
from dotenv import load_dotenv
|
| 11 |
+
from langgraph.graph import START, StateGraph
|
| 12 |
+
from typing import List, Dict, Any, Optional
|
| 13 |
+
from pydantic import BaseModel, Field
|
| 14 |
+
from langchain.docstore.document import Document
|
| 15 |
+
|
| 16 |
+
load_dotenv()
|
| 17 |
+
|
| 18 |
+
class State(BaseModel):
|
| 19 |
+
question: str = Field(..., description="Type your question here")
|
| 20 |
+
context: List[Document] = Field(
|
| 21 |
+
default_factory=list,
|
| 22 |
+
description="A list of Document objects",
|
| 23 |
+
)
|
| 24 |
+
answer: str = Field(default="", description="Answer will be here")
|
| 25 |
+
|
| 26 |
+
class TextProcessor:
|
| 27 |
+
def __init__(self):
|
| 28 |
+
# Load model provider
|
| 29 |
+
if not os.environ.get("GOOGLE_API_KEY"):
|
| 30 |
+
raise ValueError("Google Gemini API key not found in environment variables")
|
| 31 |
+
|
| 32 |
+
self.llm = init_chat_model("gemini-2.5-flash", model_provider="google_genai")
|
| 33 |
+
self.embedding_model = HuggingFaceEmbeddings(
|
| 34 |
+
model_name="sentence-transformers/all-MiniLM-L6-v2",
|
| 35 |
+
model_kwargs={"device": "cpu"}
|
| 36 |
+
)
|
| 37 |
+
self.prompt = hub.pull("rlm/rag-prompt")
|
| 38 |
+
self.vector_store = None
|
| 39 |
+
self.chunk_size = 1000
|
| 40 |
+
self.chunk_overlap = 200
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
def process_text(self, file_path: str) -> Dict[str, Any]:
|
| 44 |
+
"""
|
| 45 |
+
Process a text file and prepare it for querying
|
| 46 |
+
|
| 47 |
+
Args:
|
| 48 |
+
file_path (str): Path to the text file
|
| 49 |
+
|
| 50 |
+
Returns:
|
| 51 |
+
Dict[str, Any]: Processing status and information
|
| 52 |
+
"""
|
| 53 |
+
try:
|
| 54 |
+
# Document Loading
|
| 55 |
+
loader = TextLoader(file_path, encoding='utf-8')
|
| 56 |
+
pages = loader.load()
|
| 57 |
+
|
| 58 |
+
# Text Splitting
|
| 59 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=self.chunk_size, chunk_overlap=self.chunk_overlap)
|
| 60 |
+
texts = text_splitter.split_documents(pages)
|
| 61 |
+
|
| 62 |
+
# Vector Store Setup
|
| 63 |
+
embedding_dim = len(self.embedding_model.embed_query("test"))
|
| 64 |
+
index = faiss.IndexFlatL2(embedding_dim)
|
| 65 |
+
|
| 66 |
+
self.vector_store = FAISS(
|
| 67 |
+
embedding_function=self.embedding_model,
|
| 68 |
+
index=index,
|
| 69 |
+
docstore=InMemoryDocstore(),
|
| 70 |
+
index_to_docstore_id={},
|
| 71 |
+
)
|
| 72 |
+
|
| 73 |
+
# Index chunks
|
| 74 |
+
self.vector_store.add_documents(documents=texts)
|
| 75 |
+
|
| 76 |
+
return {
|
| 77 |
+
"status": "success",
|
| 78 |
+
"message": "Text file processed successfully",
|
| 79 |
+
"num_pages": len(pages),
|
| 80 |
+
"num_chunks": len(texts)
|
| 81 |
+
}
|
| 82 |
+
except Exception as e:
|
| 83 |
+
return {
|
| 84 |
+
"status": "error",
|
| 85 |
+
"message": f"Error processing text file: {str(e)}"
|
| 86 |
+
}
|
| 87 |
+
|
| 88 |
+
def query_response(self, query: str) -> Dict[str, Any]:
|
| 89 |
+
"""
|
| 90 |
+
Query the processed document
|
| 91 |
+
|
| 92 |
+
Args:
|
| 93 |
+
query (str): The question to ask about the document
|
| 94 |
+
|
| 95 |
+
Returns:
|
| 96 |
+
Dict[str, Any]: Answer and relevant context
|
| 97 |
+
"""
|
| 98 |
+
if not self.vector_store:
|
| 99 |
+
return {
|
| 100 |
+
"status": "error",
|
| 101 |
+
"message": "No document has been processed yet"
|
| 102 |
+
}
|
| 103 |
+
|
| 104 |
+
try:
|
| 105 |
+
# Create state graph
|
| 106 |
+
graph_builder = StateGraph(State)
|
| 107 |
+
|
| 108 |
+
# Define retrieval step
|
| 109 |
+
def retrieve(state: State):
|
| 110 |
+
retrieved_docs = self.vector_store.similarity_search(state.question)
|
| 111 |
+
return {"context": retrieved_docs}
|
| 112 |
+
|
| 113 |
+
# Define generation step
|
| 114 |
+
def generate(state: State):
|
| 115 |
+
docs_content = "\n\n".join(doc.page_content for doc in state.context)
|
| 116 |
+
messages = self.prompt.invoke({
|
| 117 |
+
"question": state.question,
|
| 118 |
+
"context": docs_content
|
| 119 |
+
})
|
| 120 |
+
response = self.llm.invoke(messages)
|
| 121 |
+
return {"answer": response.content}
|
| 122 |
+
|
| 123 |
+
# Build and compile the graph
|
| 124 |
+
graph = graph_builder.add_sequence([retrieve, generate]).set_entry_point("retrieve").compile()
|
| 125 |
+
|
| 126 |
+
# Execute the query
|
| 127 |
+
response = graph.invoke({
|
| 128 |
+
"question": query
|
| 129 |
+
})
|
| 130 |
+
|
| 131 |
+
return {
|
| 132 |
+
"status": "success",
|
| 133 |
+
"answer": response["answer"],
|
| 134 |
+
"query": query
|
| 135 |
+
}
|
| 136 |
+
except Exception as e:
|
| 137 |
+
return {
|
| 138 |
+
"status": "error",
|
| 139 |
+
"message": f"Error querying document: {str(e)}"
|
| 140 |
+
}
|
| 141 |
+
|
webHandler.py
ADDED
|
@@ -0,0 +1,110 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import requests
|
| 2 |
+
from bs4 import BeautifulSoup
|
| 3 |
+
from typing import Dict, Any
|
| 4 |
+
|
| 5 |
+
class WebProcessor:
|
| 6 |
+
def __init__(self):
|
| 7 |
+
self.content = ""
|
| 8 |
+
self.url = ""
|
| 9 |
+
|
| 10 |
+
def process_url(self, url: str) -> Dict[str, Any]:
|
| 11 |
+
"""Process a web page URL"""
|
| 12 |
+
try:
|
| 13 |
+
# Set headers to mimic a real browser
|
| 14 |
+
headers = {
|
| 15 |
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
|
| 16 |
+
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
| 17 |
+
'Accept-Language': 'en-US,en;q=0.5',
|
| 18 |
+
'Accept-Encoding': 'gzip, deflate',
|
| 19 |
+
'Connection': 'keep-alive',
|
| 20 |
+
}
|
| 21 |
+
|
| 22 |
+
# Fetch the webpage
|
| 23 |
+
response = requests.get(url, headers=headers, timeout=10)
|
| 24 |
+
response.raise_for_status()
|
| 25 |
+
|
| 26 |
+
# Parse with BeautifulSoup
|
| 27 |
+
soup = BeautifulSoup(response.content, 'html.parser')
|
| 28 |
+
|
| 29 |
+
# Remove unwanted elements
|
| 30 |
+
for element in soup(['script', 'style', 'nav', 'header', 'footer', 'aside', 'advertisement']):
|
| 31 |
+
element.decompose()
|
| 32 |
+
|
| 33 |
+
# Extract text content
|
| 34 |
+
text_content = soup.get_text()
|
| 35 |
+
|
| 36 |
+
# Clean up the text
|
| 37 |
+
lines = (line.strip() for line in text_content.splitlines())
|
| 38 |
+
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
|
| 39 |
+
text_content = ' '.join(chunk for chunk in chunks if chunk)
|
| 40 |
+
|
| 41 |
+
if not text_content.strip():
|
| 42 |
+
return {"status": "error", "message": "No text content could be extracted from the webpage"}
|
| 43 |
+
|
| 44 |
+
# Extract title
|
| 45 |
+
title = soup.find('title')
|
| 46 |
+
page_title = title.get_text().strip() if title else "Untitled"
|
| 47 |
+
|
| 48 |
+
self.content = text_content.strip()
|
| 49 |
+
self.url = url
|
| 50 |
+
|
| 51 |
+
return {
|
| 52 |
+
"status": "success",
|
| 53 |
+
"message": "Web page processed successfully",
|
| 54 |
+
"title": page_title,
|
| 55 |
+
"num_pages": 1,
|
| 56 |
+
"num_chunks": len(text_content.split()) // 100 + 1,
|
| 57 |
+
"word_count": len(text_content.split())
|
| 58 |
+
}
|
| 59 |
+
|
| 60 |
+
except requests.exceptions.RequestException as e:
|
| 61 |
+
return {"status": "error", "message": f"Failed to fetch webpage: {str(e)}"}
|
| 62 |
+
except Exception as e:
|
| 63 |
+
return {"status": "error", "message": f"Error processing webpage: {str(e)}"}
|
| 64 |
+
|
| 65 |
+
def query_response(self, query: str) -> Dict[str, Any]:
|
| 66 |
+
"""Answer a query about the web content"""
|
| 67 |
+
if not self.content:
|
| 68 |
+
return {"status": "error", "message": "No web content available"}
|
| 69 |
+
|
| 70 |
+
try:
|
| 71 |
+
# Simple keyword-based search
|
| 72 |
+
answer = self._search_content(query, self.content)
|
| 73 |
+
return {
|
| 74 |
+
"status": "success",
|
| 75 |
+
"answer": answer
|
| 76 |
+
}
|
| 77 |
+
except Exception as e:
|
| 78 |
+
return {"status": "error", "message": str(e)}
|
| 79 |
+
|
| 80 |
+
def get_content(self) -> str:
|
| 81 |
+
"""Get the extracted content"""
|
| 82 |
+
return self.content
|
| 83 |
+
|
| 84 |
+
def _search_content(self, query: str, content: str) -> str:
|
| 85 |
+
"""Simple keyword-based search"""
|
| 86 |
+
query_words = query.lower().split()
|
| 87 |
+
|
| 88 |
+
# Split content into sentences
|
| 89 |
+
sentences = []
|
| 90 |
+
for sentence in content.split('.'):
|
| 91 |
+
sentence = sentence.strip()
|
| 92 |
+
if len(sentence) > 10: # Filter out very short fragments
|
| 93 |
+
sentences.append(sentence)
|
| 94 |
+
|
| 95 |
+
# Find relevant sentences
|
| 96 |
+
relevant_sentences = []
|
| 97 |
+
for sentence in sentences:
|
| 98 |
+
sentence_lower = sentence.lower()
|
| 99 |
+
score = sum(1 for word in query_words if word in sentence_lower)
|
| 100 |
+
if score > 0:
|
| 101 |
+
relevant_sentences.append((sentence, score))
|
| 102 |
+
|
| 103 |
+
if not relevant_sentences:
|
| 104 |
+
return "I couldn't find information related to your query on this webpage."
|
| 105 |
+
|
| 106 |
+
# Sort by relevance and return top sentences
|
| 107 |
+
relevant_sentences.sort(key=lambda x: x[1], reverse=True)
|
| 108 |
+
top_sentences = [sent[0] for sent in relevant_sentences[:3]]
|
| 109 |
+
|
| 110 |
+
return ". ".join(top_sentences)
|