Spaces:

mayankchugh-learning
/

Document-Audit-RAG

Sleeping

App Files Files Community

Mayank Chugh commited on 29 days ago

Commit

bdfb32d

1 Parent(s): d19142f

Implement Milestone 8 by adding new endpoints for URL ingestion and collection management. Introduce `httpx` as a dependency for handling URL downloads, and enhance the API with endpoints for listing and deleting collections. Update request and response models to support new functionalities, and refactor existing routes for improved clarity and organization.

Browse files

Files changed (11) hide show

api/routes/audit.py +10 -7
api/routes/ingest.py +175 -1
api/routes/query.py +83 -21
models/requests.py +36 -3
models/responses.py +16 -0
pyproject.toml +1 -0
rag/retriever.py +42 -0
rag/vector_store.py +14 -1
requirements.txt +1 -0
storage/audit_store.py +2 -1
uv.lock +6 -3

api/routes/audit.py CHANGED Viewed

@@ -7,16 +7,19 @@ from models.requests import AuditListParams
 from models.responses import AuditDetailResponse, AuditEvent, AuditListResponse
 from storage.audit_store import get_audit_event, list_audit_events
 def _audit_list_params(
     limit: Annotated[int, Query(ge=1, le=100)] = 10,
     offset: Annotated[int, Query(ge=0)] = 0,
 ) -> AuditListParams:
     return AuditListParams(limit=limit, offset=offset)
-router = APIRouter(tags=["audit"])
-@router.get("/audit", response_model=AuditListResponse)
-async def audit_list(
     params: Annotated[AuditListParams, Depends(_audit_list_params)],
 ) -> AuditListResponse:
     settings = get_settings()
@@ -29,14 +32,14 @@ async def audit_list(
     )
-@router.get("/audit/{event_id}", response_model=AuditDetailResponse)
-async def audit_detail(event_id: str) -> AuditDetailResponse:
     settings = get_settings()
-    event = await get_audit_event(settings.audit_db_path, event_id)
     if event is None:
         raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Audit event not found.")
     return AuditDetailResponse(
         status="success",
         message="Audit event retrieved.",
         event=event,
-    )

 from models.responses import AuditDetailResponse, AuditEvent, AuditListResponse
 from storage.audit_store import get_audit_event, list_audit_events
 def _audit_list_params(
     limit: Annotated[int, Query(ge=1, le=100)] = 10,
     offset: Annotated[int, Query(ge=0)] = 0,
 ) -> AuditListParams:
     return AuditListParams(limit=limit, offset=offset)
+router = APIRouter(prefix="/audit", tags=["audit"])
+@router.get("/logs", response_model=AuditListResponse)
+async def audit_logs(
     params: Annotated[AuditListParams, Depends(_audit_list_params)],
 ) -> AuditListResponse:
     settings = get_settings()
     )
+@router.get("/logs/{query_id}", response_model=AuditDetailResponse)
+async def audit_log_detail(query_id: str) -> AuditDetailResponse:
     settings = get_settings()
+    event = await get_audit_event(settings.audit_db_path, query_id)
     if event is None:
         raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Audit event not found.")
     return AuditDetailResponse(
         status="success",
         message="Audit event retrieved.",
         event=event,
+    )

api/routes/ingest.py CHANGED Viewed

@@ -1,11 +1,20 @@
 from pathlib import Path
 from tempfile import NamedTemporaryFile
 from typing import Annotated
 from fastapi import APIRouter, BackgroundTasks, File, Form, HTTPException, UploadFile, status
 from api.config import get_settings
-from models.responses import IngestUploadResponse
 from storage.job_store import create_ingest_job
 from workers.ingest_worker import run_ingest_job
@@ -13,6 +22,13 @@ router = APIRouter(prefix="/ingest", tags=["ingest"])
 _SUPPORTED_EXTENSIONS = frozenset({".pdf", ".txt", ".md"})
 def _validate_file(file: UploadFile, max_bytes: int) -> str:
     filename = (file.filename or "").strip()
@@ -38,6 +54,84 @@ def _validate_file(file: UploadFile, max_bytes: int) -> str:
     return suffix
 @router.post("/upload", response_model=IngestUploadResponse)
 async def upload_endpoint(
     background_tasks: BackgroundTasks,
@@ -87,3 +181,83 @@ async def upload_endpoint(
         raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(exc)) from exc
     finally:
         await file.close()

 from pathlib import Path
 from tempfile import NamedTemporaryFile
 from typing import Annotated
+from urllib.parse import unquote, urlparse
+import httpx
 from fastapi import APIRouter, BackgroundTasks, File, Form, HTTPException, UploadFile, status
 from api.config import get_settings
+from models.requests import IngestUrlRequest
+from models.responses import (
+    IngestCollectionsResponse,
+    IngestDeleteCollectionResponse,
+    IngestUploadResponse,
+    CollectionItem,
+)
+from rag.vector_store import delete_collection, list_collection_names
 from storage.job_store import create_ingest_job
 from workers.ingest_worker import run_ingest_job
 _SUPPORTED_EXTENSIONS = frozenset({".pdf", ".txt", ".md"})
+_CONTENT_TYPE_SUFFIX: dict[str, str] = {
+    "application/pdf": ".pdf",
+    "text/plain": ".txt",
+    "text/markdown": ".md",
+    "text/x-markdown": ".md",
+}
 def _validate_file(file: UploadFile, max_bytes: int) -> str:
     filename = (file.filename or "").strip()
     return suffix
+def _suffix_from_url_path(url: str) -> str | None:
+    path = urlparse(url).path
+    suffix = Path(unquote(path)).suffix.lower()
+    return suffix if suffix in _SUPPORTED_EXTENSIONS else None
+def _suffix_from_content_type(content_type: str | None) -> str | None:
+    if not content_type:
+        return None
+    base = content_type.split(";")[0].strip().lower()
+    return _CONTENT_TYPE_SUFFIX.get(base)
+def _display_name_from_url(url: str, suffix: str) -> str:
+    name = Path(unquote(urlparse(url).path)).name.strip()
+    if not name or name in {"/", "."}:
+        return f"download{suffix}"
+    if Path(name).suffix.lower() not in _SUPPORTED_EXTENSIONS:
+        return f"{name}{suffix}" if not name.endswith(suffix) else name
+    return name
+async def _download_url_to_temp(url: str, max_bytes: int) -> tuple[str, str]:
+    parsed = urlparse(url)
+    if parsed.scheme not in ("http", "https"):
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail="Only http and https URLs are supported.",
+        )
+    timeout = httpx.Timeout(60.0, connect=10.0)
+    limits = httpx.Limits(max_keepalive_connections=5, max_connections=5)
+    headers = {"User-Agent": "doc-audi-ai/ingest"}
+    try:
+        async with httpx.AsyncClient(timeout=timeout, limits=limits, follow_redirects=True) as client:
+            async with client.stream("GET", url, headers=headers) as response:
+                response.raise_for_status()
+                content_type = response.headers.get("content-type")
+                suffix = _suffix_from_url_path(url) or _suffix_from_content_type(content_type)
+                if not suffix:
+                    raise HTTPException(
+                        status_code=status.HTTP_400_BAD_REQUEST,
+                        detail=(
+                            "Could not determine file type from the URL path or Content-Type. "
+                            "Provide a .pdf, .txt, or .md resource with matching content-type."
+                        ),
+                    )
+                display_name = _display_name_from_url(url, suffix)
+                total = 0
+                with NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
+                    temp_path = tmp.name
+                    async for chunk in response.aiter_bytes(chunk_size=65536):
+                        total += len(chunk)
+                        if total > max_bytes:
+                            raise HTTPException(
+                                status_code=status.HTTP_413_REQUEST_ENTITY_TOO_LARGE,
+                                detail=f"Download too large. Max allowed is {max_bytes // (1024 * 1024)}MB.",
+                            )
+                        tmp.write(chunk)
+    except HTTPException:
+        raise
+    except httpx.HTTPStatusError as exc:
+        code = exc.response.status_code if exc.response else "unknown"
+        raise HTTPException(
+            status_code=status.HTTP_502_BAD_GATEWAY,
+            detail=f"Remote server returned HTTP {code}.",
+        ) from exc
+    except httpx.RequestError as exc:
+        raise HTTPException(
+            status_code=status.HTTP_502_BAD_GATEWAY,
+            detail=f"Failed to download URL: {exc}",
+        ) from exc
+    return temp_path, display_name
 @router.post("/upload", response_model=IngestUploadResponse)
 async def upload_endpoint(
     background_tasks: BackgroundTasks,
         raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(exc)) from exc
     finally:
         await file.close()
+@router.post("/url", response_model=IngestUploadResponse)
+async def ingest_url_endpoint(
+    background_tasks: BackgroundTasks,
+    payload: IngestUrlRequest,
+) -> IngestUploadResponse:
+    settings = get_settings()
+    max_bytes = settings.max_file_size_mb * 1024 * 1024
+    url_str = str(payload.url).strip()
+    temp_path = ""
+    try:
+        temp_path, display_name = await _download_url_to_temp(url_str, max_bytes)
+        job_id = await create_ingest_job(
+            settings.jobs_db_path,
+            collection_name=payload.collection_name,
+            filename=display_name,
+        )
+        background_tasks.add_task(
+            run_ingest_job,
+            job_id,
+            temp_path,
+            payload.collection_name,
+            settings.jobs_db_path,
+            settings.chroma_persist_directory,
+        )
+        return IngestUploadResponse(
+            status="queued",
+            message=f"Ingestion job accepted. Poll GET /jobs/{job_id} for status.",
+            job_id=job_id,
+            document_ids=[],
+        )
+    except HTTPException:
+        if temp_path:
+            Path(temp_path).unlink(missing_ok=True)
+        raise
+    except Exception as exc:
+        if temp_path:
+            Path(temp_path).unlink(missing_ok=True)
+        raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(exc)) from exc
+@router.get("/collections", response_model=IngestCollectionsResponse)
+async def list_collections_endpoint() -> IngestCollectionsResponse:
+    settings = get_settings()
+    try:
+        names = list_collection_names(settings.chroma_persist_directory)
+    except Exception as exc:
+        raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(exc)) from exc
+    items = [CollectionItem(name=n) for n in names]
+    return IngestCollectionsResponse(
+        status="success",
+        message=f"Found {len(items)} collection(s).",
+        collections=items,
+    )
+@router.delete("/collection/{collection_name}", response_model=IngestDeleteCollectionResponse)
+async def delete_collection_endpoint(collection_name: str) -> IngestDeleteCollectionResponse:
+    if not collection_name.strip():
+        raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="collection_name is required.")
+    settings = get_settings()
+    name = collection_name.strip()
+    try:
+        existing = list_collection_names(settings.chroma_persist_directory)
+        if name not in existing:
+            raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Collection not found.")
+        delete_collection(settings.chroma_persist_directory, name)
+    except HTTPException:
+        raise
+    except Exception as exc:
+        raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(exc)) from exc
+    return IngestDeleteCollectionResponse(
+        status="success",
+        message=f"Deleted collection '{name}'.",
+        collection_name=name,
+    )

api/routes/query.py CHANGED Viewed

@@ -1,31 +1,29 @@
 from fastapi import APIRouter, HTTPException, status
 from api.config import get_settings
-from models.requests import QueryRequest
 from models.responses import QueryResponse, QueryResultItem, QuerySourceItem
 from rag.embedder import create_embedding_function
-from rag.retriever import answer_with_grounding, retrieve_chunks
 from rag.vector_store import get_vector_store
 from storage.audit_store import persist_query_audit
-router = APIRouter(tags=["query"])
-@router.post("/query", response_model=QueryResponse)
-async def query_endpoint(payload: QueryRequest) -> QueryResponse:
-    settings = get_settings()
-    try:
-        embedding_function = create_embedding_function()
-        vector_store = get_vector_store(
-            persist_directory=settings.chroma_persist_directory,
-            collection_name=payload.collection_name,
-            embedding_function=embedding_function,
-        )
-        chunks = retrieve_chunks(vector_store, payload.question, settings.top_k_results)
-        answer = answer_with_grounding(settings, payload.question, chunks)
-    except Exception as exc:
-        raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(exc)) from exc
     results = [QueryResultItem(text=chunk.text, score=chunk.score) for chunk in chunks]
     sources = [
         QuerySourceItem(
@@ -37,20 +35,84 @@ async def query_endpoint(payload: QueryRequest) -> QueryResponse:
         )
         for chunk in chunks
     ]
-    response = QueryResponse(
         status="success",
-        message=f"Retrieved {len(results)} chunks from '{payload.collection_name}' and generated grounded answer.",
         answer=answer,
         sources=sources,
         results=results,
     )
     try:
         await persist_query_audit(
             settings.audit_db_path,
             question=payload.question,
             collection_name=payload.collection_name,
             response=response,
         )
     except Exception as exc:
         raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(exc)) from exc
-    return response

 from fastapi import APIRouter, HTTPException, status
 from api.config import get_settings
+from models.requests import QueryRequest, SummariseRequest
 from models.responses import QueryResponse, QueryResultItem, QuerySourceItem
 from rag.embedder import create_embedding_function
+from rag.retriever import (
+    SUMMARY_RETRIEVAL_QUERY,
+    RetrievedChunk,
+    answer_with_grounding,
+    retrieve_chunks,
+    summarise_with_grounding,
+)
 from rag.vector_store import get_vector_store
 from storage.audit_store import persist_query_audit
+router = APIRouter(prefix="/query", tags=["query"])
+def _response_from_chunks(
+    *,
+    collection_name: str,
+    chunks: list[RetrievedChunk],
+    answer: str,
+    message: str,
+) -> QueryResponse:
     results = [QueryResultItem(text=chunk.text, score=chunk.score) for chunk in chunks]
     sources = [
         QuerySourceItem(
         )
         for chunk in chunks
     ]
+    return QueryResponse(
         status="success",
+        message=message,
         answer=answer,
         sources=sources,
         results=results,
     )
+@router.post("/ask", response_model=QueryResponse)
+async def ask_endpoint(payload: QueryRequest) -> QueryResponse:
+    settings = get_settings()
+    try:
+        embedding_function = create_embedding_function()
+        vector_store = get_vector_store(
+            persist_directory=settings.chroma_persist_directory,
+            collection_name=payload.collection_name,
+            embedding_function=embedding_function,
+        )
+        chunks = retrieve_chunks(vector_store, payload.question, settings.top_k_results)
+        answer = answer_with_grounding(settings, payload.question, chunks)
+    except Exception as exc:
+        raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(exc)) from exc
+    response = _response_from_chunks(
+        collection_name=payload.collection_name,
+        chunks=chunks,
+        answer=answer,
+        message=(
+            f"Retrieved {len(chunks)} chunks from '{payload.collection_name}' and generated a grounded answer."
+        ),
+    )
     try:
         await persist_query_audit(
             settings.audit_db_path,
+            action="query",
             question=payload.question,
             collection_name=payload.collection_name,
             response=response,
         )
     except Exception as exc:
         raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(exc)) from exc
+    return response
+@router.post("/summarise", response_model=QueryResponse)
+async def summarise_endpoint(payload: SummariseRequest) -> QueryResponse:
+    settings = get_settings()
+    retrieval_query = (payload.focus or "").strip() or SUMMARY_RETRIEVAL_QUERY
+    audit_question = payload.focus.strip() if payload.focus and payload.focus.strip() else "Summarise collection"
+    try:
+        embedding_function = create_embedding_function()
+        vector_store = get_vector_store(
+            persist_directory=settings.chroma_persist_directory,
+            collection_name=payload.collection_name,
+            embedding_function=embedding_function,
+        )
+        chunks = retrieve_chunks(vector_store, retrieval_query, settings.top_k_results)
+        answer = summarise_with_grounding(settings, focus=payload.focus, chunks=chunks)
+    except Exception as exc:
+        raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(exc)) from exc
+    response = _response_from_chunks(
+        collection_name=payload.collection_name,
+        chunks=chunks,
+        answer=answer,
+        message=(
+            f"Retrieved {len(chunks)} chunks from '{payload.collection_name}' and generated a grounded summary."
+        ),
+    )
+    try:
+        await persist_query_audit(
+            settings.audit_db_path,
+            action="summarise",
+            question=audit_question,
+            collection_name=payload.collection_name,
+            response=response,
+        )
+    except Exception as exc:
+        raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(exc)) from exc
+    return response

models/requests.py CHANGED Viewed

@@ -1,11 +1,44 @@
-from pydantic import BaseModel, ConfigDict, Field
-class QueryRequest(BaseModel):
     model_config = ConfigDict(extra="forbid")
     question: str = Field(min_length=1, max_length=8000, description="The question to ask the document")
-    collection_name: str = Field(default="default", min_length=1, max_length=256, description="The name of the collection to ask the question from")
 class IngestUploadRequest(BaseModel):
     model_config = ConfigDict(extra="forbid")

+from pydantic import BaseModel, ConfigDict, Field, HttpUrl
+class QueryRequest(BaseModel):
     model_config = ConfigDict(extra="forbid")
     question: str = Field(min_length=1, max_length=8000, description="The question to ask the document")
+    collection_name: str = Field(
+        default="default",
+        min_length=1,
+        max_length=256,
+        description="The name of the collection to ask the question from",
+    )
+class SummariseRequest(BaseModel):
+    model_config = ConfigDict(extra="forbid")
+    collection_name: str = Field(
+        default="default",
+        min_length=1,
+        max_length=256,
+        description="Chroma collection to summarise from",
+    )
+    focus: str | None = Field(
+        default=None,
+        max_length=8000,
+        description="Optional angle or scope for retrieval and the summary (e.g. 'contract payment terms')",
+    )
+class IngestUrlRequest(BaseModel):
+    model_config = ConfigDict(extra="forbid")
+    url: HttpUrl = Field(description="HTTP(S) URL to a PDF, TXT, or Markdown document")
+    collection_name: str = Field(
+        default="default",
+        min_length=1,
+        max_length=256,
+        description="Target Chroma collection name",
+    )
 class IngestUploadRequest(BaseModel):
     model_config = ConfigDict(extra="forbid")

models/responses.py CHANGED Viewed

@@ -25,6 +25,22 @@ class IngestUploadResponse(BaseModel):
     job_id: str
     document_ids: list[str] = Field(default_factory=list)
 class JobSummary(BaseModel):
     job_id: str
     status: str

     job_id: str
     document_ids: list[str] = Field(default_factory=list)
+class CollectionItem(BaseModel):
+    name: str
+class IngestCollectionsResponse(BaseModel):
+    status: str
+    message: str
+    collections: list[CollectionItem] = Field(default_factory=list)
+class IngestDeleteCollectionResponse(BaseModel):
+    status: str
+    message: str
+    collection_name: str
 class JobSummary(BaseModel):
     job_id: str
     status: str

pyproject.toml CHANGED Viewed

@@ -20,6 +20,7 @@ dependencies = [
     "pymupdf==1.24.3",
     "python-multipart==0.0.9",
     "aiosqlite>=0.21.0",
     "uvicorn[standard]==0.29.0",
     "huggingface-hub>=1.13.0",
     "langchain-huggingface>=0.0.3",

     "pymupdf==1.24.3",
     "python-multipart==0.0.9",
     "aiosqlite>=0.21.0",
+    "httpx>=0.27.0",
     "uvicorn[standard]==0.29.0",
     "huggingface-hub>=1.13.0",
     "langchain-huggingface>=0.0.3",

rag/retriever.py CHANGED Viewed

@@ -57,6 +57,11 @@ def retrieve_chunks(vector_store: Chroma, question: str, k: int) -> list[Retriev
     return chunks
 def answer_with_grounding(settings: Settings, question: str, chunks: list[RetrievedChunk]) -> str:
     ranked_chunks = [chunk for chunk in chunks if chunk.score is None or chunk.score >= MIN_RELEVANCE_SCORE]
     if not ranked_chunks:
@@ -84,6 +89,43 @@ def answer_with_grounding(settings: Settings, question: str, chunks: list[Retrie
     return answer or NO_MATCH_ANSWER
 def _create_chat_model(settings: Settings) -> BaseChatModel:
     provider = settings.llm_provider.lower()

     return chunks
+SUMMARY_RETRIEVAL_QUERY = (
+    "Overview of the document: main topics, key definitions, obligations, risks, and conclusions."
+)
 def answer_with_grounding(settings: Settings, question: str, chunks: list[RetrievedChunk]) -> str:
     ranked_chunks = [chunk for chunk in chunks if chunk.score is None or chunk.score >= MIN_RELEVANCE_SCORE]
     if not ranked_chunks:
     return answer or NO_MATCH_ANSWER
+def summarise_with_grounding(
+    settings: Settings,
+    *,
+    focus: str | None,
+    chunks: list[RetrievedChunk],
+) -> str:
+    ranked_chunks = [chunk for chunk in chunks if chunk.score is None or chunk.score >= MIN_RELEVANCE_SCORE]
+    if not ranked_chunks:
+        return NO_MATCH_ANSWER
+    llm = _create_chat_model(settings)
+    prompt_context = _format_context(ranked_chunks)
+    user_instruction = (
+        focus.strip()
+        if focus and focus.strip()
+        else "Summarise the main themes, structure, and important details. Use bullet points where helpful."
+    )
+    messages = [
+        SystemMessage(
+            content=(
+                "You write accurate summaries using only the provided document excerpts. "
+                "Do not invent facts. If the excerpts are insufficient, say what is missing."
+            )
+        ),
+        HumanMessage(
+            content=(
+                f"Summary request: {user_instruction}\n\n"
+                f"Document excerpts:\n{prompt_context}\n\n"
+                "Return a structured, concise summary grounded in the excerpts above."
+            )
+        ),
+    ]
+    response = llm.invoke(messages)
+    answer = _extract_message_text(response).strip()
+    return answer or NO_MATCH_ANSWER
 def _create_chat_model(settings: Settings) -> BaseChatModel:
     provider = settings.llm_provider.lower()

rag/vector_store.py CHANGED Viewed

@@ -1,9 +1,10 @@
 from pathlib import Path
 from uuid import uuid4
 from langchain_core.documents import Document
 from langchain_core.embeddings import Embeddings
-from langchain_chroma import Chroma
 def get_vector_store(
@@ -24,3 +25,15 @@ def add_documents(vector_store: Chroma, chunks: list[Document]) -> list[str]:
     vector_store.add_documents(documents=chunks, ids=document_ids)
     return document_ids

 from pathlib import Path
 from uuid import uuid4
+import chromadb
+from langchain_chroma import Chroma
 from langchain_core.documents import Document
 from langchain_core.embeddings import Embeddings
 def get_vector_store(
     vector_store.add_documents(documents=chunks, ids=document_ids)
     return document_ids
+def list_collection_names(persist_directory: str) -> list[str]:
+    Path(persist_directory).mkdir(parents=True, exist_ok=True)
+    client = chromadb.PersistentClient(path=persist_directory)
+    return sorted(c.name for c in client.list_collections())
+def delete_collection(persist_directory: str, collection_name: str) -> None:
+    Path(persist_directory).mkdir(parents=True, exist_ok=True)
+    client = chromadb.PersistentClient(path=persist_directory)
+    client.delete_collection(name=collection_name)

requirements.txt CHANGED Viewed

@@ -14,5 +14,6 @@ anthropic==0.28.1
 pymupdf==1.24.3
 python-multipart==0.0.9
 aiosqlite
 huggingface-hub
 langchain-huggingface

 pymupdf==1.24.3
 python-multipart==0.0.9
 aiosqlite
+httpx>=0.27.0
 huggingface-hub
 langchain-huggingface

storage/audit_store.py CHANGED Viewed

@@ -34,6 +34,7 @@ async def init_audit_db(db_path: str) -> None:
 async def persist_query_audit(
     db_path: str,
     *,
     question: str,
     collection_name: str,
     response: QueryResponse,
@@ -49,7 +50,7 @@ async def persist_query_audit(
             """,
             (
                 event_id,
-                "query",
                 question,
                 collection_name,
                 response.answer,

 async def persist_query_audit(
     db_path: str,
     *,
+    action: str,
     question: str,
     collection_name: str,
     response: QueryResponse,
             """,
             (
                 event_id,
+                action,
                 question,
                 collection_name,
                 response.answer,

uv.lock CHANGED Viewed

@@ -537,6 +537,7 @@ dependencies = [
     { name = "anthropic" },
     { name = "chromadb" },
     { name = "fastapi" },
     { name = "huggingface-hub" },
     { name = "langchain" },
     { name = "langchain-anthropic" },
@@ -561,6 +562,7 @@ requires-dist = [
     { name = "anthropic", specifier = "==0.28.1" },
     { name = "chromadb", specifier = "==0.5.0" },
     { name = "fastapi", specifier = "==0.111.0" },
     { name = "huggingface-hub", specifier = ">=1.13.0" },
     { name = "langchain", specifier = "==0.2.0" },
     { name = "langchain-anthropic", specifier = "==0.1.15" },
@@ -970,17 +972,18 @@ wheels = [
 [[package]]
 name = "httpx"
-version = "0.28.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "anyio" },
     { name = "certifi" },
     { name = "httpcore" },
     { name = "idna" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/b1/df/48c586a5fe32a0f01324ee087459e112ebb7224f646c0b5023f5e79e9956/httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc", size = 141406, upload-time = "2024-12-06T15:37:23.222Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517, upload-time = "2024-12-06T15:37:21.509Z" },
 ]
 [[package]]

     { name = "anthropic" },
     { name = "chromadb" },
     { name = "fastapi" },
+    { name = "httpx" },
     { name = "huggingface-hub" },
     { name = "langchain" },
     { name = "langchain-anthropic" },
     { name = "anthropic", specifier = "==0.28.1" },
     { name = "chromadb", specifier = "==0.5.0" },
     { name = "fastapi", specifier = "==0.111.0" },
+    { name = "httpx", specifier = ">=0.27.0" },
     { name = "huggingface-hub", specifier = ">=1.13.0" },
     { name = "langchain", specifier = "==0.2.0" },
     { name = "langchain-anthropic", specifier = "==0.1.15" },
 [[package]]
 name = "httpx"
+version = "0.27.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "anyio" },
     { name = "certifi" },
     { name = "httpcore" },
     { name = "idna" },
+    { name = "sniffio" },
 ]
+sdist = { url = "https://files.pythonhosted.org/packages/5c/2d/3da5bdf4408b8b2800061c339f240c1802f2e82d55e50bd39c5a881f47f0/httpx-0.27.0.tar.gz", hash = "sha256:a0cb88a46f32dc874e04ee956e4c2764aba2aa228f650b06788ba6bda2962ab5", size = 126413, upload-time = "2024-02-21T13:07:52.434Z" }
 wheels = [
+    { url = "https://files.pythonhosted.org/packages/41/7b/ddacf6dcebb42466abd03f368782142baa82e08fc0c1f8eaa05b4bae87d5/httpx-0.27.0-py3-none-any.whl", hash = "sha256:71d5465162c13681bff01ad59b2cc68dd838ea1f10e51574bac27103f00c91a5", size = 75590, upload-time = "2024-02-21T13:07:50.455Z" },
 ]
 [[package]]