Spaces:

sibikrish
/

invoice-system

Runtime error

App Files Files Community

Sibi Krishnamoorthy commited on Jun 6, 2025

Commit

c9ed90a

1 Parent(s): d5b86e1

Add application file

Browse files

Files changed (19) hide show

.dockerignore +12 -0
.gitignore +173 -0
.python-version +1 -0
Dockerfile +27 -0
app/api/models/chat.py +17 -0
app/api/models/invoice.py +30 -0
app/api/routes/chat.py +22 -0
app/api/routes/invoice.py +55 -0
app/core/config.py +34 -0
app/main.py +47 -0
app/services/chatbot.py +92 -0
app/services/invoice_processor.py +68 -0
app/services/vector_store.py +108 -0
app/utils/pdf_processor.py +31 -0
docker-compose.yml +13 -0
main.py +6 -0
pyproject.toml +22 -0
requirements.txt +495 -0
uv.lock +0 -0

.dockerignore ADDED Viewed

	@@ -0,0 +1,12 @@

+__pycache__/
+*.pyc
+*.pyo
+*.pyd
+venv/
+.env
+.git/
+.gitignore
+.DS_Store
+*.pdf
+*.zip
+data/

.gitignore ADDED Viewed

	@@ -0,0 +1,173 @@

+data/
+dep/
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# UV
+#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#uv.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
+.pdm.toml
+.pdm-python
+.pdm-build/
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+# PyPI configuration file
+.pypirc

.python-version ADDED Viewed

	@@ -0,0 +1 @@


1	+ 3.13

Dockerfile ADDED Viewed

	@@ -0,0 +1,27 @@

+# Use official Python image
+FROM python:3.11-slim
+# Set environment variables
+ENV PYTHONDONTWRITEBYTECODE=1
+ENV PYTHONUNBUFFERED=1
+# Set work directory
+WORKDIR /app
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    build-essential \
+    && rm -rf /var/lib/apt/lists/*
+# Install Python dependencies
+COPY requirements.txt .
+RUN pip install --upgrade pip && pip install -r requirements.txt
+# Copy project
+COPY . .
+# Expose port
+EXPOSE 8000
+# Run the application
+CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000", "--reload"]

app/api/models/chat.py ADDED Viewed

	@@ -0,0 +1,17 @@

+from pydantic import BaseModel, Field
+from typing import List, Optional
+from datetime import datetime
+class ChatMessage(BaseModel):
+    role: str = Field(..., description="Role of the message sender (user/assistant)")
+    content: str = Field(..., description="Content of the message")
+    timestamp: datetime = Field(default_factory=datetime.utcnow)
+class ChatRequest(BaseModel):
+    query: str = Field(..., description="User's query about invoice reimbursements")
+    chat_history: Optional[List[ChatMessage]] = Field(default=None, description="Previous chat messages for context")
+class ChatResponse(BaseModel):
+    response: str = Field(..., description="Assistant's response in markdown format")
+    relevant_invoices: Optional[List[str]] = Field(default=None, description="List of relevant invoice IDs referenced in the response")
+    chat_history: List[ChatMessage] = Field(..., description="Updated chat history including the new exchange")

app/api/models/invoice.py ADDED Viewed

	@@ -0,0 +1,30 @@

+from pydantic import BaseModel, Field
+from typing import List, Optional
+from enum import Enum
+from datetime import datetime
+class ReimbursementStatus(str, Enum):
+    FULLY_REIMBURSED = "FULLY_REIMBURSED"
+    PARTIALLY_REIMBURSED = "PARTIALLY_REIMBURSED"
+    DECLINED = "DECLINED"
+class InvoiceAnalysis(BaseModel):
+    invoice_id: str = Field(..., description="Unique identifier for the invoice")
+    employee_name: str = Field(..., description="Name of the employee")
+    invoice_date: datetime = Field(..., description="Date of the invoice")
+    total_amount: float = Field(..., description="Total amount of the invoice")
+    reimbursable_amount: float = Field(..., description="Amount that can be reimbursed")
+    status: ReimbursementStatus = Field(..., description="Reimbursement status")
+    reason: str = Field(..., description="Detailed reason for the reimbursement status")
+    policy_violations: Optional[List[str]] = Field(default=None, description="List of policy violations if any")
+    created_at: datetime = Field(default_factory=datetime.utcnow)
+class InvoiceAnalysisResponse(BaseModel):
+    success: bool = Field(..., description="Whether the analysis was successful")
+    message: str = Field(..., description="Response message")
+    analysis: Optional[InvoiceAnalysis] = Field(default=None, description="Analysis results if successful")
+class InvoiceAnalysisRequest(BaseModel):
+    employee_name: str = Field(..., description="Name of the employee")
+    policy_text: str = Field(..., description="Text content of the reimbursement policy")
+    invoice_text: str = Field(..., description="Text content of the invoice")

app/api/routes/chat.py ADDED Viewed

	@@ -0,0 +1,22 @@

+from fastapi import APIRouter, HTTPException
+from app.api.models.chat import ChatRequest, ChatResponse
+from app.services.chatbot import Chatbot
+from app.services.vector_store import VectorStore
+router = APIRouter()
+vector_store = VectorStore()
+chatbot = Chatbot(vector_store)
+@router.post("/chat", response_model=ChatResponse)
+async def chat(request: ChatRequest):
+    try:
+        response = await chatbot.process_query(
+            query=request.query,
+            chat_history=request.chat_history
+        )
+        return response
+    except Exception as e:
+        raise HTTPException(
+            status_code=500,
+            detail=f"Error processing chat request: {str(e)}"
+        )

app/api/routes/invoice.py ADDED Viewed

	@@ -0,0 +1,55 @@

+from fastapi import APIRouter, UploadFile, File, Form, HTTPException
+from app.api.models.invoice import InvoiceAnalysisRequest, InvoiceAnalysisResponse
+from app.services.invoice_processor import InvoiceProcessor
+from app.services.vector_store import VectorStore
+from app.utils.pdf_processor import extract_text_from_pdf
+import zipfile
+import io
+from typing import List
+router = APIRouter()
+invoice_processor = InvoiceProcessor()
+vector_store = VectorStore()
+@router.post("/analyze-invoice", response_model=InvoiceAnalysisResponse)
+async def analyze_invoice(
+    policy_file: UploadFile = File(...),
+    invoice_files: UploadFile = File(...),
+    employee_name: str = Form(...)
+):
+    try:
+        # Read and process policy file
+        policy_content = await policy_file.read()
+        policy_text = extract_text_from_pdf(policy_content)
+        # Process invoice files (ZIP)
+        invoice_analyses = []
+        with zipfile.ZipFile(io.BytesIO(await invoice_files.read())) as zip_ref:
+            for filename in zip_ref.namelist():
+                if filename.endswith('.pdf'):
+                    # Extract and process each PDF
+                    with zip_ref.open(filename) as pdf_file:
+                        invoice_text = extract_text_from_pdf(pdf_file.read())
+                        # Analyze invoice
+                        analysis = await invoice_processor.analyze_invoice(
+                            employee_name=employee_name,
+                            policy_text=policy_text,
+                            invoice_text=invoice_text
+                        )
+                        # Store in vector database
+                        vector_store.store_analysis(analysis)
+                        invoice_analyses.append(analysis)
+        return InvoiceAnalysisResponse(
+            success=True,
+            message=f"Successfully analyzed {len(invoice_analyses)} invoices",
+            analysis=invoice_analyses[0] if invoice_analyses else None
+        )
+    except Exception as e:
+        raise HTTPException(
+            status_code=500,
+            detail=f"Error processing invoices: {str(e)}"
+        )

app/core/config.py ADDED Viewed

	@@ -0,0 +1,34 @@

+from pydantic_settings import BaseSettings
+from typing import Optional
+import os
+from dotenv import load_dotenv
+load_dotenv()
+class Settings(BaseSettings):
+    # API Settings
+    API_V1_STR: str = "/api/v1"
+    PROJECT_NAME: str = "Invoice Reimbursement System"
+    # OpenAI Settings
+    OPENAI_API_KEY: str = os.getenv("OPENAI_API_KEY", "")
+    OPENAI_BASE_URL: str = os.getenv("OPENAI_BASE_URL", "https://api.openai.com/v1")
+    # Vector Store Settings
+    VECTOR_STORE_PATH: str = "data/vector_store"
+    # LLM Settings
+    LLM_MODEL_NAME: str = "mistral-large-latest"
+    EMBEDDING_MODEL_NAME: str = "text-embedding-3-small"
+    # File Upload Settings
+    MAX_UPLOAD_SIZE: int = 10 * 1024 * 1024  # 10MB
+    ALLOWED_EXTENSIONS: set = {"pdf", "zip"}
+    # Chat Settings
+    MAX_CHAT_HISTORY: int = 10
+    class Config:
+        case_sensitive = True
+settings = Settings()

app/main.py ADDED Viewed

	@@ -0,0 +1,47 @@

+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+from app.core.config import settings
+from app.api.routes import invoice, chat
+import logging
+logging.basicConfig(level=logging.DEBUG)
+logger = logging.getLogger(__name__)
+app = FastAPI(
+    title=settings.PROJECT_NAME,
+    openapi_url=f"{settings.API_V1_STR}/openapi.json"
+)
+# Configure CORS
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],  # In production, replace with specific origins
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# Include routers
+app.include_router(invoice.router, prefix=settings.API_V1_STR, tags=["invoice"])
+app.include_router(chat.router, prefix=settings.API_V1_STR, tags=["chat"])
+@app.exception_handler(Exception)
+async def validation_exception_handler(request, exc):
+    logger.error(f"FastAPI error: {exc}")
+    return JSONResponse(
+        status_code=500,
+        content={"message": "Internal server error"},
+    )
+@app.get("/")
+async def root():
+    return {
+        "message": "Welcome to the Invoice Reimbursement System API",
+        "docs_url": "/docs",
+        "redoc_url": "/redoc"
+    }
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=8000)

app/services/chatbot.py ADDED Viewed

	@@ -0,0 +1,92 @@

+from langchain_openai import ChatOpenAI
+from langchain.prompts import ChatPromptTemplate
+from app.api.models.chat import ChatMessage, ChatResponse
+from app.api.models.invoice import InvoiceAnalysis
+from app.services.vector_store import VectorStore
+from typing import List, Optional
+from app.core.config import settings
+from datetime import datetime
+class Chatbot:
+    def __init__(self, vector_store: VectorStore):
+        self.llm = ChatOpenAI(
+            model_name=settings.LLM_MODEL_NAME,
+            base_url=settings.OPENAI_BASE_URL,
+            temperature=0.7
+        )
+        self.vector_store = vector_store
+        self.chat_prompt = ChatPromptTemplate.from_messages([
+            ("system", """You are an AI assistant specialized in helping users query and understand invoice reimbursement analyses.
+            You have access to a database of invoice analyses and can provide detailed information about them.
+            When responding:
+            1. Use markdown formatting for better readability
+            2. Be concise but informative
+            3. If you reference specific invoices, mention their IDs
+            4. If you're unsure about something, say so
+            5. Use the provided context to answer questions accurately
+            Previous conversation:
+            {chat_history}
+            Relevant invoice analyses:
+            {invoice_analyses}
+            User query: {query}
+            """),
+        ])
+    async def process_query(
+        self,
+        query: str,
+        chat_history: Optional[List[ChatMessage]] = None
+    ) -> ChatResponse:
+        # Search for relevant invoice analyses
+        relevant_analyses = self.vector_store.search_analyses(query)
+        # Format chat history
+        formatted_history = ""
+        if chat_history:
+            formatted_history = "\n".join([
+                f"{msg.role}: {msg.content}"
+                for msg in chat_history
+            ])
+        # Format invoice analyses
+        formatted_analyses = "\n\n".join([
+            f"Invoice ID: {analysis.invoice_id}\n"
+            f"Employee: {analysis.employee_name}\n"
+            f"Status: {analysis.status}\n"
+            f"Amount: ${analysis.total_amount}\n"
+            f"Reimbursable: ${analysis.reimbursable_amount}\n"
+            f"Reason: {analysis.reason}"
+            for analysis in relevant_analyses
+        ])
+        # Prepare the prompt
+        prompt = self.chat_prompt.format_messages(
+            chat_history=formatted_history,
+            invoice_analyses=formatted_analyses,
+            query=query
+        )
+        # Get LLM response
+        response = await self.llm.ainvoke(prompt)
+        # Create new chat messages
+        new_messages = []
+        if chat_history:
+            new_messages.extend(chat_history)
+        new_messages.extend([
+            ChatMessage(role="user", content=query),
+            ChatMessage(role="assistant", content=response.content)
+        ])
+        # Create response
+        return ChatResponse(
+            response=response.content,
+            relevant_invoices=[analysis.invoice_id for analysis in relevant_analyses],
+            chat_history=new_messages
+        )

app/services/invoice_processor.py ADDED Viewed

	@@ -0,0 +1,68 @@

+from langchain_openai import ChatOpenAI
+from langchain.prompts import ChatPromptTemplate
+from langchain.output_parsers import PydanticOutputParser
+from app.api.models.invoice import InvoiceAnalysis, ReimbursementStatus
+from app.core.config import settings
+import uuid
+from datetime import datetime
+import json
+class InvoiceProcessor:
+    def __init__(self):
+        self.llm = ChatOpenAI(
+            model_name=settings.LLM_MODEL_NAME,
+            temperature=0
+        )
+        self.parser = PydanticOutputParser(pydantic_object=InvoiceAnalysis)
+        self.analysis_prompt = ChatPromptTemplate.from_messages([
+            ("system", """You are an expert at analyzing expense invoices against company reimbursement policies.
+            Your task is to analyze the given invoice against the provided policy and determine:
+            1. The reimbursement status (FULLY_REIMBURSED, PARTIALLY_REIMBURSED, or DECLINED)
+            2. The reimbursable amount
+            3. Detailed reasons for the decision
+            4. Any policy violations found
+            Policy:
+            {policy_text}
+            Invoice:
+            {invoice_text}
+            {format_instructions}
+            """),
+        ])
+    async def analyze_invoice(self, employee_name: str, policy_text: str, invoice_text: str) -> InvoiceAnalysis:
+        # Generate a unique invoice ID
+        invoice_id = str(uuid.uuid4())
+        # Prepare the prompt
+        prompt = self.analysis_prompt.format_messages(
+            policy_text=policy_text,
+            invoice_text=invoice_text,
+            format_instructions=self.parser.get_format_instructions()
+        )
+        # Get LLM response
+        response = await self.llm.ainvoke(prompt)
+        # Parse the response
+        try:
+            analysis = self.parser.parse(response.content)
+            # Ensure the invoice_id is set
+            analysis.invoice_id = invoice_id
+            analysis.employee_name = employee_name
+            return analysis
+        except Exception as e:
+            # If parsing fails, create a default analysis
+            return InvoiceAnalysis(
+                invoice_id=invoice_id,
+                employee_name=employee_name,
+                invoice_date=datetime.utcnow(),
+                total_amount=0.0,
+                reimbursable_amount=0.0,
+                status=ReimbursementStatus.DECLINED,
+                reason=f"Error analyzing invoice: {str(e)}",
+                policy_violations=["Failed to parse invoice"]
+            )

app/services/vector_store.py ADDED Viewed

	@@ -0,0 +1,108 @@

+import chromadb
+from chromadb.config import Settings
+from app.core.config import settings
+from app.api.models.invoice import InvoiceAnalysis
+import json
+from typing import List, Dict, Any
+import os
+class VectorStore:
+    def __init__(self):
+        # Ensure the vector store directory exists
+        os.makedirs(settings.VECTOR_STORE_PATH, exist_ok=True)
+        # Initialize ChromaDB client
+        self.client = chromadb.PersistentClient(
+            path=settings.VECTOR_STORE_PATH,
+            settings=Settings(
+                anonymized_telemetry=False
+            )
+        )
+        # Create or get the collection
+        self.collection = self.client.get_or_create_collection(
+            name="invoice_analyses",
+            metadata={"hnsw:space": "cosine"}
+        )
+    def store_analysis(self, analysis: InvoiceAnalysis) -> None:
+        """Store an invoice analysis in the vector store."""
+        metadata = analysis.dict()
+        # Remove None values
+        metadata = {k: v for k, v in metadata.items() if v is not None}
+        # Convert all datetime fields to ISO strings and lists/dicts to JSON strings
+        for key, value in metadata.items():
+            if isinstance(value, (list, dict)):
+                metadata[key] = json.dumps(value)
+            elif hasattr(value, 'isoformat'):
+                metadata[key] = value.isoformat()
+        # Create document text for embedding
+        doc_text = f"""
+        Invoice Analysis for {analysis.employee_name}
+        Status: {analysis.status}
+        Total Amount: {analysis.total_amount}
+        Reimbursable Amount: {analysis.reimbursable_amount}
+        Reason: {analysis.reason}
+        Policy Violations: {', '.join(analysis.policy_violations) if analysis.policy_violations else 'None'}
+        """
+        # Store in vector database
+        self.collection.add(
+            ids=[analysis.invoice_id],
+            documents=[doc_text],
+            metadatas=[metadata]
+        )
+    def search_analyses(
+        self,
+        query: str,
+        n_results: int = 5,
+        where: Dict[str, Any] = None
+    ) -> List[InvoiceAnalysis]:
+        """Search for invoice analyses using semantic search and metadata filtering."""
+        # Perform the search
+        results = self.collection.query(
+            query_texts=[query],
+            n_results=n_results,
+            where=where
+        )
+        # Convert results back to InvoiceAnalysis objects
+        analyses = []
+        for metadata in results['metadatas'][0]:
+            # Parse JSON string fields back to Python objects
+            for key, value in metadata.items():
+                if isinstance(value, str):
+                    try:
+                        parsed = json.loads(value)
+                        # Only replace if parsed is list or dict
+                        if isinstance(parsed, (list, dict)):
+                            metadata[key] = parsed
+                    except (json.JSONDecodeError, TypeError):
+                        pass
+            analysis = InvoiceAnalysis(**metadata)
+            analyses.append(analysis)
+        return analyses
+    def get_analysis_by_id(self, invoice_id: str) -> InvoiceAnalysis:
+        """Retrieve a specific invoice analysis by ID."""
+        result = self.collection.get(
+            ids=[invoice_id]
+        )
+        if not result['metadatas']:
+            raise ValueError(f"No analysis found for invoice ID: {invoice_id}")
+        metadata = result['metadatas'][0]
+        # Parse JSON string fields back to Python objects
+        for key, value in metadata.items():
+            if isinstance(value, str):
+                try:
+                    parsed = json.loads(value)
+                    if isinstance(parsed, (list, dict)):
+                        metadata[key] = parsed
+                except (json.JSONDecodeError, TypeError):
+                    pass
+        return InvoiceAnalysis(**metadata)

app/utils/pdf_processor.py ADDED Viewed

	@@ -0,0 +1,31 @@

+from pypdf import PdfReader
+from io import BytesIO
+from typing import Union
+def extract_text_from_pdf(pdf_content: Union[bytes, BytesIO]) -> str:
+    """
+    Extract text content from a PDF file.
+    Args:
+        pdf_content: PDF file content as bytes or BytesIO object
+    Returns:
+        str: Extracted text content
+    """
+    try:
+        # Convert bytes to BytesIO if necessary
+        if isinstance(pdf_content, bytes):
+            pdf_content = BytesIO(pdf_content)
+        # Create PDF reader
+        pdf_reader = PdfReader(pdf_content)
+        # Extract text from all pages
+        text_content = []
+        for page in pdf_reader.pages:
+            text_content.append(page.extract_text())
+        return "\n".join(text_content)
+    except Exception as e:
+        raise ValueError(f"Error extracting text from PDF: {str(e)}")

docker-compose.yml ADDED Viewed

	@@ -0,0 +1,13 @@

+version: '3.8'
+services:
+  app:
+    build: .
+    container_name: invoice-reimbursement-system
+    env_file:
+      - .env
+    ports:
+      - "8000:8000"
+    volumes:
+      - ./data:/app/data
+    restart: unless-stopped

main.py ADDED Viewed

	@@ -0,0 +1,6 @@

+def main():
+    print("Hello from rembuiresment-system!")
+if __name__ == "__main__":
+    main()

pyproject.toml ADDED Viewed

	@@ -0,0 +1,22 @@

+[project]
+name = "invoice-reimbursement-system"
+version = "0.1.0"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.13"
+dependencies = [
+    "bcrypt==4.1.2",
+    "chromadb==0.4.22",
+    "fastapi==0.109.2",
+    "langchain==0.1.9",
+    "langchain-openai==0.0.8",
+    "passlib==1.7.4",
+    "pydantic>=2.7.0",
+    "pydantic-settings==2.9.1",
+    "pypdf==4.0.1",
+    "python-dotenv==1.0.1",
+    "python-jose[cryptography]==3.3.0",
+    "python-multipart==0.0.9",
+    "sentence-transformers==2.5.1",
+    "uvicorn==0.27.1",
+]

requirements.txt ADDED Viewed

	@@ -0,0 +1,495 @@

+# This file was autogenerated by uv via the following command:
+#    uv pip compile pyproject.toml -o requirements.txt
+aiohappyeyeballs==2.6.1
+    # via aiohttp
+aiohttp==3.12.9
+    # via
+    #   langchain
+    #   langchain-community
+aiosignal==1.3.2
+    # via aiohttp
+annotated-types==0.7.0
+    # via pydantic
+anyio==4.9.0
+    # via
+    #   httpx
+    #   openai
+    #   starlette
+    #   watchfiles
+asgiref==3.8.1
+    # via opentelemetry-instrumentation-asgi
+attrs==25.3.0
+    # via aiohttp
+backoff==2.2.1
+    # via posthog
+bcrypt==4.1.2
+    # via
+    #   invoice-reimbursement-system (pyproject.toml)
+    #   chromadb
+build==1.2.2.post1
+    # via chromadb
+cachetools==5.5.2
+    # via google-auth
+certifi==2025.4.26
+    # via
+    #   httpcore
+    #   httpx
+    #   kubernetes
+    #   pulsar-client
+    #   requests
+cffi==1.17.1
+    # via cryptography
+charset-normalizer==3.4.2
+    # via requests
+chroma-hnswlib==0.7.3
+    # via chromadb
+chromadb==0.4.22
+    # via invoice-reimbursement-system (pyproject.toml)
+click==8.2.1
+    # via
+    #   typer
+    #   uvicorn
+coloredlogs==15.0.1
+    # via onnxruntime
+cryptography==45.0.3
+    # via python-jose
+dataclasses-json==0.6.7
+    # via
+    #   langchain
+    #   langchain-community
+distro==1.9.0
+    # via
+    #   openai
+    #   posthog
+durationpy==0.10
+    # via kubernetes
+ecdsa==0.19.1
+    # via python-jose
+fastapi==0.109.2
+    # via
+    #   invoice-reimbursement-system (pyproject.toml)
+    #   chromadb
+filelock==3.18.0
+    # via
+    #   huggingface-hub
+    #   torch
+    #   transformers
+flatbuffers==25.2.10
+    # via onnxruntime
+frozenlist==1.6.2
+    # via
+    #   aiohttp
+    #   aiosignal
+fsspec==2025.5.1
+    # via
+    #   huggingface-hub
+    #   torch
+google-auth==2.40.3
+    # via kubernetes
+googleapis-common-protos==1.70.0
+    # via opentelemetry-exporter-otlp-proto-grpc
+greenlet==3.2.3
+    # via sqlalchemy
+grpcio==1.72.1
+    # via
+    #   chromadb
+    #   opentelemetry-exporter-otlp-proto-grpc
+h11==0.16.0
+    # via
+    #   httpcore
+    #   uvicorn
+hf-xet==1.1.3
+    # via huggingface-hub
+httpcore==1.0.9
+    # via httpx
+httptools==0.6.4
+    # via uvicorn
+httpx==0.28.1
+    # via
+    #   langsmith
+    #   openai
+huggingface-hub==0.32.4
+    # via
+    #   sentence-transformers
+    #   tokenizers
+    #   transformers
+humanfriendly==10.0
+    # via coloredlogs
+idna==3.10
+    # via
+    #   anyio
+    #   httpx
+    #   requests
+    #   yarl
+importlib-metadata==8.7.0
+    # via opentelemetry-api
+importlib-resources==6.5.2
+    # via chromadb
+jinja2==3.1.6
+    # via torch
+jiter==0.10.0
+    # via openai
+joblib==1.5.1
+    # via scikit-learn
+jsonpatch==1.33
+    # via
+    #   langchain
+    #   langchain-core
+jsonpointer==3.0.0
+    # via jsonpatch
+kubernetes==32.0.1
+    # via chromadb
+langchain==0.1.9
+    # via invoice-reimbursement-system (pyproject.toml)
+langchain-community==0.0.38
+    # via langchain
+langchain-core==0.1.53
+    # via
+    #   langchain
+    #   langchain-community
+    #   langchain-openai
+langchain-openai==0.0.8
+    # via invoice-reimbursement-system (pyproject.toml)
+langsmith==0.1.147
+    # via
+    #   langchain
+    #   langchain-community
+    #   langchain-core
+markdown-it-py==3.0.0
+    # via rich
+markupsafe==3.0.2
+    # via jinja2
+marshmallow==3.26.1
+    # via dataclasses-json
+mdurl==0.1.2
+    # via markdown-it-py
+mmh3==5.1.0
+    # via chromadb
+mpmath==1.3.0
+    # via sympy
+multidict==6.4.4
+    # via
+    #   aiohttp
+    #   yarl
+mypy-extensions==1.1.0
+    # via typing-inspect
+networkx==3.5
+    # via torch
+numpy==1.26.4
+    # via
+    #   chroma-hnswlib
+    #   chromadb
+    #   langchain
+    #   langchain-community
+    #   onnxruntime
+    #   scikit-learn
+    #   scipy
+    #   sentence-transformers
+    #   transformers
+nvidia-cublas-cu12==12.6.4.1
+    # via
+    #   nvidia-cudnn-cu12
+    #   nvidia-cusolver-cu12
+    #   torch
+nvidia-cuda-cupti-cu12==12.6.80
+    # via torch
+nvidia-cuda-nvrtc-cu12==12.6.77
+    # via torch
+nvidia-cuda-runtime-cu12==12.6.77
+    # via torch
+nvidia-cudnn-cu12==9.5.1.17
+    # via torch
+nvidia-cufft-cu12==11.3.0.4
+    # via torch
+nvidia-cufile-cu12==1.11.1.6
+    # via torch
+nvidia-curand-cu12==10.3.7.77
+    # via torch
+nvidia-cusolver-cu12==11.7.1.2
+    # via torch
+nvidia-cusparse-cu12==12.5.4.2
+    # via
+    #   nvidia-cusolver-cu12
+    #   torch
+nvidia-cusparselt-cu12==0.6.3
+    # via torch
+nvidia-nccl-cu12==2.26.2
+    # via torch
+nvidia-nvjitlink-cu12==12.6.85
+    # via
+    #   nvidia-cufft-cu12
+    #   nvidia-cusolver-cu12
+    #   nvidia-cusparse-cu12
+    #   torch
+nvidia-nvtx-cu12==12.6.77
+    # via torch
+oauthlib==3.2.2
+    # via
+    #   kubernetes
+    #   requests-oauthlib
+onnxruntime==1.22.0
+    # via chromadb
+openai==1.84.0
+    # via langchain-openai
+opentelemetry-api==1.34.0
+    # via
+    #   chromadb
+    #   opentelemetry-exporter-otlp-proto-grpc
+    #   opentelemetry-instrumentation
+    #   opentelemetry-instrumentation-asgi
+    #   opentelemetry-instrumentation-fastapi
+    #   opentelemetry-sdk
+    #   opentelemetry-semantic-conventions
+opentelemetry-exporter-otlp-proto-common==1.34.0
+    # via opentelemetry-exporter-otlp-proto-grpc
+opentelemetry-exporter-otlp-proto-grpc==1.34.0
+    # via chromadb
+opentelemetry-instrumentation==0.55b0
+    # via
+    #   opentelemetry-instrumentation-asgi
+    #   opentelemetry-instrumentation-fastapi
+opentelemetry-instrumentation-asgi==0.55b0
+    # via opentelemetry-instrumentation-fastapi
+opentelemetry-instrumentation-fastapi==0.55b0
+    # via chromadb
+opentelemetry-proto==1.34.0
+    # via
+    #   opentelemetry-exporter-otlp-proto-common
+    #   opentelemetry-exporter-otlp-proto-grpc
+opentelemetry-sdk==1.34.0
+    # via
+    #   chromadb
+    #   opentelemetry-exporter-otlp-proto-grpc
+opentelemetry-semantic-conventions==0.55b0
+    # via
+    #   opentelemetry-instrumentation
+    #   opentelemetry-instrumentation-asgi
+    #   opentelemetry-instrumentation-fastapi
+    #   opentelemetry-sdk
+opentelemetry-util-http==0.55b0
+    # via
+    #   opentelemetry-instrumentation-asgi
+    #   opentelemetry-instrumentation-fastapi
+orjson==3.10.18
+    # via langsmith
+overrides==7.7.0
+    # via chromadb
+packaging==23.2
+    # via
+    #   build
+    #   huggingface-hub
+    #   langchain-core
+    #   marshmallow
+    #   onnxruntime
+    #   opentelemetry-instrumentation
+    #   transformers
+passlib==1.7.4
+    # via invoice-reimbursement-system (pyproject.toml)
+pillow==11.2.1
+    # via sentence-transformers
+posthog==4.2.0
+    # via chromadb
+propcache==0.3.1
+    # via
+    #   aiohttp
+    #   yarl
+protobuf==5.29.5
+    # via
+    #   googleapis-common-protos
+    #   onnxruntime
+    #   opentelemetry-proto
+pulsar-client==3.7.0
+    # via chromadb
+pyasn1==0.6.1
+    # via
+    #   pyasn1-modules
+    #   python-jose
+    #   rsa
+pyasn1-modules==0.4.2
+    # via google-auth
+pycparser==2.22
+    # via cffi
+pydantic==2.11.5
+    # via
+    #   invoice-reimbursement-system (pyproject.toml)
+    #   chromadb
+    #   fastapi
+    #   langchain
+    #   langchain-core
+    #   langsmith
+    #   openai
+    #   pydantic-settings
+pydantic-core==2.33.2
+    # via pydantic
+pydantic-settings==2.9.1
+    # via invoice-reimbursement-system (pyproject.toml)
+pygments==2.19.1
+    # via rich
+pypdf==4.0.1
+    # via invoice-reimbursement-system (pyproject.toml)
+pypika==0.48.9
+    # via chromadb
+pyproject-hooks==1.2.0
+    # via build
+python-dateutil==2.9.0.post0
+    # via
+    #   kubernetes
+    #   posthog
+python-dotenv==1.0.1
+    # via
+    #   invoice-reimbursement-system (pyproject.toml)
+    #   pydantic-settings
+    #   uvicorn
+python-jose==3.3.0
+    # via invoice-reimbursement-system (pyproject.toml)
+python-multipart==0.0.9
+    # via invoice-reimbursement-system (pyproject.toml)
+pyyaml==6.0.2
+    # via
+    #   chromadb
+    #   huggingface-hub
+    #   kubernetes
+    #   langchain
+    #   langchain-community
+    #   langchain-core
+    #   transformers
+    #   uvicorn
+regex==2024.11.6
+    # via
+    #   tiktoken
+    #   transformers
+requests==2.32.3
+    # via
+    #   chromadb
+    #   huggingface-hub
+    #   kubernetes
+    #   langchain
+    #   langchain-community
+    #   langsmith
+    #   posthog
+    #   requests-oauthlib
+    #   requests-toolbelt
+    #   tiktoken
+    #   transformers
+requests-oauthlib==2.0.0
+    # via kubernetes
+requests-toolbelt==1.0.0
+    # via langsmith
+rich==14.0.0
+    # via typer
+rsa==4.9.1
+    # via
+    #   google-auth
+    #   python-jose
+safetensors==0.5.3
+    # via transformers
+scikit-learn==1.7.0
+    # via sentence-transformers
+scipy==1.15.3
+    # via
+    #   scikit-learn
+    #   sentence-transformers
+sentence-transformers==2.5.1
+    # via invoice-reimbursement-system (pyproject.toml)
+setuptools==80.9.0
+    # via
+    #   torch
+    #   triton
+shellingham==1.5.4
+    # via typer
+six==1.17.0
+    # via
+    #   ecdsa
+    #   kubernetes
+    #   posthog
+    #   python-dateutil
+sniffio==1.3.1
+    # via
+    #   anyio
+    #   openai
+sqlalchemy==2.0.41
+    # via
+    #   langchain
+    #   langchain-community
+starlette==0.36.3
+    # via fastapi
+sympy==1.14.0
+    # via
+    #   onnxruntime
+    #   torch
+tenacity==8.5.0
+    # via
+    #   chromadb
+    #   langchain
+    #   langchain-community
+    #   langchain-core
+threadpoolctl==3.6.0
+    # via scikit-learn
+tiktoken==0.9.0
+    # via langchain-openai
+tokenizers==0.21.1
+    # via
+    #   chromadb
+    #   transformers
+torch==2.7.1
+    # via sentence-transformers
+tqdm==4.67.1
+    # via
+    #   chromadb
+    #   huggingface-hub
+    #   openai
+    #   sentence-transformers
+    #   transformers
+transformers==4.52.4
+    # via sentence-transformers
+triton==3.3.1
+    # via torch
+typer==0.16.0
+    # via chromadb
+typing-extensions==4.14.0
+    # via
+    #   chromadb
+    #   fastapi
+    #   huggingface-hub
+    #   openai
+    #   opentelemetry-api
+    #   opentelemetry-exporter-otlp-proto-grpc
+    #   opentelemetry-sdk
+    #   opentelemetry-semantic-conventions
+    #   pydantic
+    #   pydantic-core
+    #   sqlalchemy
+    #   torch
+    #   typer
+    #   typing-inspect
+    #   typing-inspection
+typing-inspect==0.9.0
+    # via dataclasses-json
+typing-inspection==0.4.1
+    # via
+    #   pydantic
+    #   pydantic-settings
+urllib3==2.4.0
+    # via
+    #   kubernetes
+    #   requests
+uvicorn==0.27.1
+    # via
+    #   invoice-reimbursement-system (pyproject.toml)
+    #   chromadb
+uvloop==0.21.0
+    # via uvicorn
+watchfiles==1.0.5
+    # via uvicorn
+websocket-client==1.8.0
+    # via kubernetes
+websockets==15.0.1
+    # via uvicorn
+wrapt==1.17.2
+    # via opentelemetry-instrumentation
+yarl==1.20.0
+    # via aiohttp
+zipp==3.22.0
+    # via importlib-metadata
+pytest
+reportlab

uv.lock ADDED Viewed

The diff for this file is too large to render. See raw diff