Spaces:

anabaslabs
/

ARC

Running

App Files Files Community

github-actions[bot] commited on 26 days ago

Commit

f6a4c20

1 Parent(s): b623276

Deploy from GitHub Actions: df12f0dffbbb25b28e353981e621b4fe6afd80f0

Browse files

Files changed (23) hide show

.dockerignore +170 -0
.env.example +10 -0
.gitattributes +0 -35
.gitignore +9 -0
Dockerfile +29 -0
README.md +35 -4
app/config.py +54 -0
app/main.py +52 -0
app/rag/chunker.py +14 -0
app/rag/cleaner.py +37 -0
app/rag/embedder.py +11 -0
app/rag/loader.py +76 -0
app/rag/pipeline.py +43 -0
app/rag/vectorstore.py +73 -0
app/routes/ask.py +33 -0
app/routes/chats.py +34 -0
app/routes/clear.py +11 -0
app/routes/delete.py +11 -0
app/routes/upload.py +59 -0
app/static/favicon.ico +0 -0
package.json +9 -0
requirements.txt +23 -0
vercel.json +14 -0

.dockerignore ADDED Viewed

	@@ -0,0 +1,170 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+### Python Patch ###
+# Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
+poetry.toml
+# ruff
+.ruff_cache/
+# LSP config files
+pyrightconfig.json

.env.example ADDED Viewed

	@@ -0,0 +1,10 @@

+# API
+ENV=development # `production` or `development`
+CORS_ORIGINS=["*"]
+# Gemini
+GOOGLE_API_KEY=your_gemini_api_key
+# Pinecone
+PINECONE_API_KEY=your_pinecone_api_key
+PINECONE_INDEX_NAME=your_pinecone_index_name

.gitattributes DELETED Viewed

@@ -1,35 +0,0 @@
-*.7z filter=lfs diff=lfs merge=lfs -text
-*.arrow filter=lfs diff=lfs merge=lfs -text
-*.bin filter=lfs diff=lfs merge=lfs -text
-*.bz2 filter=lfs diff=lfs merge=lfs -text
-*.ckpt filter=lfs diff=lfs merge=lfs -text
-*.ftz filter=lfs diff=lfs merge=lfs -text
-*.gz filter=lfs diff=lfs merge=lfs -text
-*.h5 filter=lfs diff=lfs merge=lfs -text
-*.joblib filter=lfs diff=lfs merge=lfs -text
-*.lfs.* filter=lfs diff=lfs merge=lfs -text
-*.mlmodel filter=lfs diff=lfs merge=lfs -text
-*.model filter=lfs diff=lfs merge=lfs -text
-*.msgpack filter=lfs diff=lfs merge=lfs -text
-*.npy filter=lfs diff=lfs merge=lfs -text
-*.npz filter=lfs diff=lfs merge=lfs -text
-*.onnx filter=lfs diff=lfs merge=lfs -text
-*.ot filter=lfs diff=lfs merge=lfs -text
-*.parquet filter=lfs diff=lfs merge=lfs -text
-*.pb filter=lfs diff=lfs merge=lfs -text
-*.pickle filter=lfs diff=lfs merge=lfs -text
-*.pkl filter=lfs diff=lfs merge=lfs -text
-*.pt filter=lfs diff=lfs merge=lfs -text
-*.pth filter=lfs diff=lfs merge=lfs -text
-*.rar filter=lfs diff=lfs merge=lfs -text
-*.safetensors filter=lfs diff=lfs merge=lfs -text
-saved_model/**/* filter=lfs diff=lfs merge=lfs -text
-*.tar.* filter=lfs diff=lfs merge=lfs -text
-*.tar filter=lfs diff=lfs merge=lfs -text
-*.tflite filter=lfs diff=lfs merge=lfs -text
-*.tgz filter=lfs diff=lfs merge=lfs -text
-*.wasm filter=lfs diff=lfs merge=lfs -text
-*.xz filter=lfs diff=lfs merge=lfs -text
-*.zip filter=lfs diff=lfs merge=lfs -text
-*.zst filter=lfs diff=lfs merge=lfs -text
-*tfevents* filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,9 @@

+.env
+data/
+data
+__pycache__/
+__pycache__
+*venv/
+*venv
+docs/

Dockerfile ADDED Viewed

	@@ -0,0 +1,29 @@

+FROM python:3.12-slim
+RUN apt-get update && apt-get install -y --no-install-recommends \
+  ca-certificates \
+  libmagic1 \
+  && rm -rf /var/lib/apt/lists/*
+COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
+ENV ENV=production \
+  PORT=7860 \
+  PYTHONUNBUFFERED=1 \
+  PYTHONDONTWRITEBYTECODE=1
+RUN useradd -m -u 1000 user
+USER user
+ENV HOME=/home/user \
+  PATH=/home/user/.local/bin:$PATH
+WORKDIR $HOME/app
+COPY --chown=user requirements.txt .
+RUN uv pip install --no-cache --system -r requirements.txt
+COPY --chown=user app app
+EXPOSE ${PORT}
+CMD ["sh", "-c", "uvicorn app.main:app --host 0.0.0.0 --port ${PORT}"]

README.md CHANGED Viewed

@@ -1,11 +1,42 @@
 ---
 title: ARC
-emoji: 🌍
-colorFrom: red
-colorTo: blue
 sdk: docker
 pinned: false
 short_description: Augmented Retrieval Chatbot
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 title: ARC
+emoji: ⚡
+colorFrom: green
+colorTo: yellow
 sdk: docker
+app_port: 7860
 pinned: false
 short_description: Augmented Retrieval Chatbot
 ---
+## Backend
+### Run Server
+```bash
+cd backend
+```
+```bash
+uv venv .venv
+```
+```bash
+.venv\Scripts\activate
+```
+```bash
+uv pip install -r requirements.txt
+```
+```bash
+uvicorn app.main:app --reload --host 0.0.0.0 --port 8000
+```
+---
+### Clean Cache
+```bash
+Get-ChildItem -Path . -Include **pycache** -Recurse -Force | Remove-Item -Recurse -Force
+```

app/config.py ADDED Viewed

	@@ -0,0 +1,54 @@

+import os
+import json
+from dotenv import load_dotenv
+load_dotenv()
+ENV = os.getenv("ENV", "development")
+CORS_ORIGINS_STR = os.getenv("CORS_ORIGINS", '["*"]')
+CORS_ORIGINS = json.loads(CORS_ORIGINS_STR)
+APP_NAME = "ARC API"
+APP_VERSION = "2.0.0"
+GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
+PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
+PINECONE_INDEX_NAME = os.getenv("PINECONE_INDEX_NAME")
+EMBED_MODEL = "models/gemini-embedding-001"
+CHAT_MODEL = "gemini-2.5-flash-lite"
+TOP_K = 10
+CHUNK_SIZE = 1500
+CHUNK_OVERLAP = 200
+UPLOAD_BATCH_SIZE = 100
+MAX_FILE_COUNT = 6
+MAX_FILE_SIZE = 5 * 1024 * 1024
+UPLOAD_DIR = "data/uploads"
+ALLOWED_TYPES = {
+    "pdf",
+    "docx",
+    "xlsx",
+    "csv",
+    "pptx",
+    "txt",
+    "md",
+    "json",
+}
+PROMPT = (
+    "You are ARC, a helpful document assistant. "
+    "Answer the question based ONLY on the provided context. "
+    "If the context contains math or LaTeX, preserve them using $ for inline and $$ for display math. "
+    "If you cannot answer from the context, say so honestly. "
+    "Context: {context} Question: {question}"
+)
+CREATORS = [
+    {"name": "Krishnendu Das", "url": "https://itskdhere.com"},
+    {"name": "Saptarshi Roy", "url": "https://hirishi.in"}
+]

app/main.py ADDED Viewed

	@@ -0,0 +1,52 @@

+from app.routes.ask import router as ask_router
+from app.routes.delete import router as delete_router
+from app.routes.clear import router as clear_router
+from app.routes.chats import router as chats_router
+from app.routes.upload import router as upload_router
+from app.config import APP_NAME, APP_VERSION, CORS_ORIGINS, ENV, CREATORS
+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.staticfiles import StaticFiles
+from fastapi.responses import FileResponse
+import os
+app = FastAPI(
+    title=APP_NAME,
+    version=APP_VERSION,
+    description="Augmented Retrieval Chatbot - API",
+    docs_url=None if ENV == "production" else "/docs",
+    redoc_url=None if ENV == "production" else "/redoc",
+    openapi_url=None if ENV == "production" else "/openapi.json",
+)
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=CORS_ORIGINS,
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+app.include_router(ask_router)
+app.include_router(upload_router)
+app.include_router(delete_router)
+app.include_router(clear_router)
+app.include_router(chats_router)
+@app.get("/")
+async def root():
+    return {
+        "name": APP_NAME,
+        "version": APP_VERSION,
+        "status": "OK",
+        "creators": CREATORS
+    }
+app.mount("/static", StaticFiles(directory="app/static"), name="static")
+@app.get('/favicon.ico', include_in_schema=False)
+async def favicon():
+    return FileResponse(os.path.join("app", "static", "favicon.ico"))

app/rag/chunker.py ADDED Viewed

	@@ -0,0 +1,14 @@

+from app.config import CHUNK_OVERLAP, CHUNK_SIZE
+from langchain_core.documents import Document
+from langchain_text_splitters import RecursiveCharacterTextSplitter
+text_splitter = RecursiveCharacterTextSplitter(
+    chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP
+)
+def chunk_docs(docs: list[Document]) -> list[Document]:
+    return text_splitter.split_documents(docs)
+# https://docs.langchain.com/oss/python/integrations/splitters

app/rag/cleaner.py ADDED Viewed

	@@ -0,0 +1,37 @@

+import re
+def clean_text(text: str) -> str:
+    if not text:
+        return ""
+    text = text.replace("\x00", "")
+    text = re.sub(r"\n{3,}", "\n\n", text)
+    text = re.sub(r"[ \t]+", " ", text)
+    return text.strip()
+def process_latex(text: str) -> str:
+    if not text:
+        return text
+    text = re.sub(r"\\\[(.*?)\\\]", r"$$\1$$", text, flags=re.DOTALL)
+    text = re.sub(r"\\\((.*?)\\\)", r"$\1$", text, flags=re.DOTALL)
+    display = r"equation|align|gather|displaymath|eqnarray|multline|flalign|split"
+    text = re.sub(
+        rf"\\begin{{({display})\*?}}(.*?)\\end{{\1\*?}}",
+        r"$$\2$$",
+        text,
+        flags=re.DOTALL,
+    )
+    matrix = r"matrix|pmatrix|bmatrix|vmatrix|Bmatrix|cases|array"
+    text = re.sub(
+        rf"(?<!\$)\\begin{{({matrix})\*?}}(.*?)\\end{{\1\*?}}",
+        r"$$\\begin{\1}\2\\end{\1}$$",
+        text,
+        flags=re.DOTALL,
+    )
+    return re.sub(r"\${3,}", "$$", text)

app/rag/embedder.py ADDED Viewed

	@@ -0,0 +1,11 @@

+from app.config import EMBED_MODEL, GOOGLE_API_KEY
+from langchain_google_genai import GoogleGenerativeAIEmbeddings
+embeddings = GoogleGenerativeAIEmbeddings(
+    model=EMBED_MODEL,
+    google_api_key=GOOGLE_API_KEY,
+    output_dimensionality=768,
+)
+# https://python.langchain.com/docs/integrations/text_embedding/google_generative_ai/

app/rag/loader.py ADDED Viewed

	@@ -0,0 +1,76 @@

+from langchain_community.document_loaders import (
+    CSVLoader,
+    Docx2txtLoader,
+    JSONLoader,
+    PDFPlumberLoader,
+    TextLoader,
+    UnstructuredExcelLoader,
+    UnstructuredMarkdownLoader,
+    UnstructuredPowerPointLoader,
+)
+from langchain_core.documents import Document
+# PDF
+# https://python.langchain.com/docs/integrations/document_loaders/pdfplumber
+def read_pdf(path: str) -> list[Document]:
+    loader = PDFPlumberLoader(path)
+    docs = loader.load()
+    return docs
+# TXT
+# https://python.langchain.com/api_reference/community/document_loaders/langchain_community.document_loaders.text.TextLoader
+def read_txt(path: str) -> list[Document]:
+    loader = TextLoader(path, encoding="utf-8")
+    docs = loader.load()
+    return docs
+# CSV
+# https://python.langchain.com/docs/integrations/document_loaders/csv
+def read_csv(path: str) -> list[Document]:
+    loader = CSVLoader(file_path=path)
+    docs = loader.load()
+    return docs
+# MD
+# https://python.langchain.com/docs/integrations/document_loaders/unstructured_file/
+def read_md(path: str) -> list[Document]:
+    loader = UnstructuredMarkdownLoader(path)
+    docs = loader.load()
+    return docs
+# JSON
+# https://python.langchain.com/docs/integrations/document_loaders/json
+def read_json(path: str) -> list[Document]:
+    loader = JSONLoader(file_path=path, jq_schema=".", text_content=False)
+    docs = loader.load()
+    return docs
+# DOCX
+# https://python.langchain.com/docs/integrations/document_loaders/microsoft_word
+def read_docx(path: str) -> list[Document]:
+    loader = Docx2txtLoader(path)
+    docs = loader.load()
+    return docs
+# XLSX
+# https://python.langchain.com/docs/integrations/document_loaders/microsoft_excel
+def read_xlsx(path: str) -> list[Document]:
+    loader = UnstructuredExcelLoader(path, mode="elements")
+    docs = loader.load()
+    return docs
+# PPTX
+# https://python.langchain.com/docs/integrations/document_loaders/microsoft_powerpoint
+def read_pptx(path: str) -> list[Document]:
+    loader = UnstructuredPowerPointLoader(path, mode="elements")
+    docs = loader.load()
+    return docs

app/rag/pipeline.py ADDED Viewed

	@@ -0,0 +1,43 @@

+from app.rag.chunker import chunk_docs
+from app.rag.cleaner import clean_text, process_latex
+from app.rag.loader import (
+    read_csv,
+    read_docx,
+    read_json,
+    read_md,
+    read_pdf,
+    read_pptx,
+    read_txt,
+    read_xlsx,
+)
+from app.rag.vectorstore import add_documents
+from langchain_core.documents import Document
+LOADERS = {
+    "pdf": read_pdf,
+    "txt": read_txt,
+    "csv": read_csv,
+    "md": read_md,
+    "json": read_json,
+    "docx": read_docx,
+    "xlsx": read_xlsx,
+    "pptx": read_pptx,
+}
+def _clean_docs(docs: list[Document]) -> list[Document]:
+    for doc in docs:
+        doc.page_content = clean_text(doc.page_content)
+        doc.page_content = process_latex(doc.page_content)
+    return docs
+def process_file(path: str, ext: str, session_id: str = "default_index") -> int:
+    loader = LOADERS.get(ext.lower())
+    if loader is None:
+        raise ValueError(f"Unsupported file type: .{ext}")
+    docs = loader(path)
+    docs = _clean_docs(docs)
+    chunks = chunk_docs(docs)
+    add_documents(chunks, session_id=session_id)
+    return len(chunks)

app/rag/vectorstore.py ADDED Viewed

	@@ -0,0 +1,73 @@

+import threading
+import time
+from app.config import PINECONE_API_KEY, PINECONE_INDEX_NAME, UPLOAD_BATCH_SIZE
+from app.rag.embedder import embeddings
+from langchain_core.documents import Document
+from langchain_pinecone import PineconeVectorStore
+from pinecone import Pinecone
+_pinecone_index = None
+_pinecone_lock = threading.Lock()
+def _get_index():
+    global _pinecone_index
+    if _pinecone_index is None:
+        with _pinecone_lock:
+            if _pinecone_index is None:
+                pc = Pinecone(api_key=PINECONE_API_KEY)
+                _pinecone_index = pc.Index(PINECONE_INDEX_NAME)
+    return _pinecone_index
+def get_vectorstore(session_id: str = "default_index") -> PineconeVectorStore:
+    return PineconeVectorStore(
+        index_name=PINECONE_INDEX_NAME,
+        embedding=embeddings,
+        pinecone_api_key=PINECONE_API_KEY,
+        namespace=session_id,
+    )
+def add_documents(chunks: list[Document], session_id: str = "default_index") -> None:
+    if not chunks:
+        raise ValueError("No text could be extracted from the file.")
+    store = get_vectorstore(session_id)
+    for i in range(0, len(chunks), UPLOAD_BATCH_SIZE):
+        batch = chunks[i : i + UPLOAD_BATCH_SIZE]
+        store.add_documents(batch)
+        if i + UPLOAD_BATCH_SIZE < len(chunks):
+            time.sleep(0.5)
+def delete_vectorstore(session_id: str) -> bool:
+    try:
+        index = _get_index()
+        index.delete(delete_all=True, namespace=session_id)
+        return True
+    except Exception as e:
+        print(f"delete_vectorstore: failed to delete namespace '{session_id}': {e}")
+        return False
+def delete_all_vectorstores() -> bool:
+    try:
+        index = _get_index()
+        stats = index.describe_index_stats()
+        namespaces = list(stats.namespaces.keys())
+        failed: list[str] = []
+        for ns in namespaces:
+            try:
+                index.delete(delete_all=True, namespace=ns)
+            except Exception as e:
+                print(f"Failed to delete namespace '{ns}': {e}")
+                failed.append(ns)
+        if failed:
+            print(f"delete_all_vectorstores: {len(failed)}/{len(namespaces)} namespaces failed: {failed}")
+            return False
+        return True
+    except Exception as e:
+        print(f"delete_all_vectorstores: unexpected error: {e}")
+        return False

app/routes/ask.py ADDED Viewed

	@@ -0,0 +1,33 @@

+from app.config import CHAT_MODEL, GOOGLE_API_KEY, PROMPT, TOP_K
+from app.rag.vectorstore import get_vectorstore
+from fastapi import APIRouter, HTTPException
+from langchain_core.messages import HumanMessage
+from langchain_google_genai import ChatGoogleGenerativeAI
+from pydantic import BaseModel
+router = APIRouter()
+llm = ChatGoogleGenerativeAI(model=CHAT_MODEL, google_api_key=GOOGLE_API_KEY)
+class AskRequest(BaseModel):
+    question: str
+    session_id: str = "default_index"
+class AskResponse(BaseModel):
+    answer: str
+@router.post("/ask")
+async def ask(body: AskRequest) -> AskResponse:
+    store = get_vectorstore(body.session_id)
+    docs = store.similarity_search(body.question, k=TOP_K)
+    if not docs:
+        raise HTTPException(400, "No documents found for this session.")
+    context = "\n\n".join(d.page_content for d in docs)
+    response = llm.invoke([HumanMessage(content=PROMPT.format(context=context, question=body.question))])
+    return AskResponse(answer=str(response.content))

app/routes/chats.py ADDED Viewed

	@@ -0,0 +1,34 @@

+import re
+from datetime import datetime
+from app.rag.vectorstore import _get_index
+from fastapi import APIRouter, HTTPException
+router = APIRouter()
+@router.get("/chats")
+async def get_chats() -> dict:
+    try:
+        index = _get_index()
+        stats = index.describe_index_stats()
+        namespaces = list(stats.namespaces.keys())
+        chats = []
+        for ns in namespaces:
+            if not re.fullmatch(r'\d+', ns):
+                continue
+            timestamp = int(ns) / 1000.0
+            dt = datetime.fromtimestamp(timestamp)
+            chats.append({
+                "id": ns,
+                "title": f"Analysis {dt.strftime('%H:%M:%S')}",
+                "date": dt.strftime('%Y-%m-%d')
+            })
+        chats.sort(key=lambda x: int(x["id"]), reverse=True)
+        return {"chats": chats}
+    except Exception as e:
+        print(f"Error fetching chats: {e}")
+        raise HTTPException(500, "Failed to fetch chats from Pinecone.")

app/routes/clear.py ADDED Viewed

	@@ -0,0 +1,11 @@

+from app.rag.vectorstore import delete_all_vectorstores
+from fastapi import APIRouter, HTTPException
+router = APIRouter()
+@router.delete("/clear")
+async def clear_index() -> dict:
+    if not delete_all_vectorstores():
+        raise HTTPException(500, "Failed to clear the vector store.")
+    return {"message": "All vector stores cleared."}

app/routes/delete.py ADDED Viewed

	@@ -0,0 +1,11 @@

+from app.rag.vectorstore import delete_vectorstore
+from fastapi import APIRouter, HTTPException
+router = APIRouter()
+@router.delete("/delete/{session_id}")
+async def delete_specific_chat(session_id: str) -> dict:
+    if not delete_vectorstore(session_id):
+        raise HTTPException(404, f"No vector store found for session: {session_id}")
+    return {"message": f"Vector store for session {session_id} deleted."}

app/routes/upload.py ADDED Viewed

	@@ -0,0 +1,59 @@

+import os
+from app.config import ALLOWED_TYPES, MAX_FILE_COUNT, MAX_FILE_SIZE, UPLOAD_DIR
+from app.rag.pipeline import process_file
+from fastapi import APIRouter, File, Form, HTTPException, UploadFile
+router = APIRouter()
+@router.post("/upload")
+async def upload_files(files: list[UploadFile] = File(...), session_id: str = Form(...)) -> dict:
+    results = []
+    errors = []
+    total_chunks = 0
+    if len(files) > MAX_FILE_COUNT:
+        raise HTTPException(400, f"Maximum {MAX_FILE_COUNT} files allowed")
+    os.makedirs(UPLOAD_DIR, exist_ok=True)
+    for file in files:
+        original_name = file.filename or f"upload.bin"
+        safe_name = os.path.basename(original_name)
+        if not safe_name:
+            errors.append({"source": original_name, "error": "Invalid filename"})
+            continue
+        ext = safe_name.rsplit(".", 1)[-1].lower()
+        if ext not in ALLOWED_TYPES:
+            errors.append({"source": original_name, "error": f"Unsupported file type: .{ext}"})
+            continue
+        content = await file.read()
+        if len(content) > MAX_FILE_SIZE:
+            errors.append({"source": original_name, "error": "File too large"})
+            continue
+        path = os.path.join(UPLOAD_DIR, safe_name)
+        try:
+            with open(path, "wb") as f:
+                f.write(content)
+            chunks = process_file(path, ext, session_id=session_id)
+            total_chunks += chunks
+            results.append({"source": original_name, "chunks": chunks})
+        except Exception as e:
+            errors.append({"source": original_name, "error": str(e)})
+        finally:
+            if os.path.exists(path):
+                os.remove(path)
+    if not results and errors:
+        raise HTTPException(422, {"message": "All files failed to process", "errors": errors})
+    return {
+        "total_files": len(files),
+        "total_chunks": total_chunks,
+        "details": results,
+        "errors": errors,
+    }

app/static/favicon.ico ADDED Viewed

package.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+  "name": "arc-backend",
+  "version": "2.0.0",
+  "private": true,
+  "scripts": {
+    "dev": ".venv\\Scripts\\activate && uvicorn app.main:app --reload --host 0.0.0.0 --port 8000",
+    "start": "uvicorn app.main:app --host 0.0.0.0 --port 8000"
+  }
+}

requirements.txt ADDED Viewed

	@@ -0,0 +1,23 @@

+fastapi
+uvicorn[standard]
+python-multipart
+langchain-community
+langchain-core
+langchain-text-splitters
+langchain-google-genai
+pinecone-client
+langchain-pinecone
+google-generativeai
+pdfplumber
+docx2txt
+openpyxl
+python-pptx
+unstructured
+markdown
+jq
+python-dotenv

vercel.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+  "builds": [
+    {
+      "src": "app/main.py",
+      "use": "@vercel/python"
+    }
+  ],
+  "routes": [
+    {
+      "src": "/(.*)",
+      "dest": "app/main.py"
+    }
+  ]
+}