Upload 8 files
Browse files- .gitattributes +6 -0
- Notices.pdf +3 -0
- app.py +1029 -0
- circular-11-2025.pdf +3 -0
- circular-15-2025.pdf +3 -0
- circular-no-14-2025.pdf +3 -0
- income-tax-act-1961-as-amended-by-finance-act-2025.pdf +3 -0
- requirements.txt +29 -0
- waiver-of-interest-circular-no-13-2025.pdf +3 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,9 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
circular-11-2025.pdf filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
circular-15-2025.pdf filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
circular-no-14-2025.pdf filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
income-tax-act-1961-as-amended-by-finance-act-2025.pdf filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
Notices.pdf filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
waiver-of-interest-circular-no-13-2025.pdf filter=lfs diff=lfs merge=lfs -text
|
Notices.pdf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9ea284c144b77486b1efb466c77112f2f8fccb2c9465e46a965ddc975207a316
|
| 3 |
+
size 118105
|
app.py
ADDED
|
@@ -0,0 +1,1029 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
╔══════════════════════════════════════════════════════════════════════╗
|
| 3 |
+
║ TaxBot AI — app.py ║
|
| 4 |
+
║ Multimodal Financial Co-pilot for Indian MSMEs & Taxpayers ║
|
| 5 |
+
║ Built for Hugging Face Spaces · Powered by Streamlit + LangChain ║
|
| 6 |
+
╚══════════════════════════════════════════════════════════════════════╝
|
| 7 |
+
|
| 8 |
+
ARCHITECTURE OVERVIEW (Triple-Engine Hybrid RAG):
|
| 9 |
+
Engine 1 — Knowledge Base (RAG): PDF → ChromaDB → Retrieval
|
| 10 |
+
Engine 2 — Generative Reasoning: Retrieved context → Claude → Answer
|
| 11 |
+
Engine 3 — Notice Interpreter: Image/PDF upload → GPT-4o Vision → Summary
|
| 12 |
+
|
| 13 |
+
DEPLOYMENT:
|
| 14 |
+
1. Upload this file + requirements.txt to a Hugging Face Space (Streamlit SDK).
|
| 15 |
+
2. Set secrets: ANTHROPIC_API_KEY, OPENAI_API_KEY in HF Space Settings.
|
| 16 |
+
3. ChromaDB runs in-memory (no external DB needed for the pilot).
|
| 17 |
+
"""
|
| 18 |
+
|
| 19 |
+
# ─────────────────────────────────────────────
|
| 20 |
+
# SECTION 0: Imports & Page Configuration
|
| 21 |
+
# ─────────────────────────────────────────────
|
| 22 |
+
import os
|
| 23 |
+
import io
|
| 24 |
+
import base64
|
| 25 |
+
import tempfile
|
| 26 |
+
import streamlit as st
|
| 27 |
+
|
| 28 |
+
# LangChain — the orchestration backbone
|
| 29 |
+
from langchain_community.document_loaders import PyPDFLoader
|
| 30 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
| 31 |
+
from langchain_community.vectorstores import Chroma
|
| 32 |
+
from langchain_openai import OpenAIEmbeddings # Embeddings via OpenAI
|
| 33 |
+
from langchain_anthropic import ChatAnthropic # LLM via Anthropic Claude
|
| 34 |
+
from langchain.chains import RetrievalQA
|
| 35 |
+
from langchain.prompts import PromptTemplate
|
| 36 |
+
|
| 37 |
+
# OpenAI Vision (for Notice Interpreter)
|
| 38 |
+
from openai import OpenAI
|
| 39 |
+
|
| 40 |
+
# ── Streamlit Page Setup (must be FIRST Streamlit call) ──────────────
|
| 41 |
+
st.set_page_config(
|
| 42 |
+
page_title="TaxBot AI · Indian Tax Co-pilot",
|
| 43 |
+
page_icon="⚖️",
|
| 44 |
+
layout="wide",
|
| 45 |
+
initial_sidebar_state="expanded",
|
| 46 |
+
)
|
| 47 |
+
|
| 48 |
+
# ─────────────────────────────────────────────
|
| 49 |
+
# SECTION 1: Custom CSS — Visual Identity
|
| 50 |
+
# ─────────────────────────────────────────────
|
| 51 |
+
# Design Direction: "Legal Ink on Digital Paper"
|
| 52 |
+
# Crisp deep navy + saffron accent. Monospaced touches for the "statutory" feel.
|
| 53 |
+
# Evokes trust, authority, and precision — appropriate for a compliance tool.
|
| 54 |
+
|
| 55 |
+
st.markdown("""
|
| 56 |
+
<style>
|
| 57 |
+
@import url('https://fonts.googleapis.com/css2?family=DM+Serif+Display:ital@0;1&family=JetBrains+Mono:wght@400;600&family=DM+Sans:wght@400;500;600&display=swap');
|
| 58 |
+
|
| 59 |
+
/* ── Root Variables ── */
|
| 60 |
+
:root {
|
| 61 |
+
--navy: #0d1b2a;
|
| 62 |
+
--saffron: #e8851a;
|
| 63 |
+
--cream: #f5f0e8;
|
| 64 |
+
--teal: #1a7a6e;
|
| 65 |
+
--red: #c0392b;
|
| 66 |
+
--text: #1a1a2e;
|
| 67 |
+
--muted: #6b7280;
|
| 68 |
+
--border: #d4c9b0;
|
| 69 |
+
--card-bg: #fdfaf5;
|
| 70 |
+
}
|
| 71 |
+
|
| 72 |
+
/* ── Global Reset ── */
|
| 73 |
+
html, body, [class*="css"] {
|
| 74 |
+
font-family: 'DM Sans', sans-serif;
|
| 75 |
+
background-color: var(--cream) !important;
|
| 76 |
+
color: var(--text);
|
| 77 |
+
}
|
| 78 |
+
|
| 79 |
+
/* ── Sidebar ── */
|
| 80 |
+
[data-testid="stSidebar"] {
|
| 81 |
+
background: var(--navy) !important;
|
| 82 |
+
border-right: 3px solid var(--saffron);
|
| 83 |
+
}
|
| 84 |
+
[data-testid="stSidebar"] * { color: var(--cream) !important; }
|
| 85 |
+
[data-testid="stSidebar"] h1,
|
| 86 |
+
[data-testid="stSidebar"] h2,
|
| 87 |
+
[data-testid="stSidebar"] h3 { color: var(--saffron) !important; }
|
| 88 |
+
[data-testid="stSidebar"] .stButton > button {
|
| 89 |
+
background: var(--saffron) !important;
|
| 90 |
+
color: var(--navy) !important;
|
| 91 |
+
font-weight: 700 !important;
|
| 92 |
+
border: none !important;
|
| 93 |
+
border-radius: 4px !important;
|
| 94 |
+
width: 100% !important;
|
| 95 |
+
font-family: 'JetBrains Mono', monospace !important;
|
| 96 |
+
letter-spacing: 0.05em;
|
| 97 |
+
}
|
| 98 |
+
[data-testid="stSidebar"] .stButton > button:hover {
|
| 99 |
+
background: #f0972a !important;
|
| 100 |
+
transform: translateY(-1px);
|
| 101 |
+
box-shadow: 0 4px 12px rgba(232,133,26,0.4) !important;
|
| 102 |
+
}
|
| 103 |
+
|
| 104 |
+
/* ── Main Header ── */
|
| 105 |
+
.taxbot-header {
|
| 106 |
+
display: flex;
|
| 107 |
+
align-items: center;
|
| 108 |
+
gap: 1rem;
|
| 109 |
+
padding: 1.5rem 0 0.5rem;
|
| 110 |
+
border-bottom: 2px solid var(--saffron);
|
| 111 |
+
margin-bottom: 1.5rem;
|
| 112 |
+
}
|
| 113 |
+
.taxbot-header h1 {
|
| 114 |
+
font-family: 'DM Serif Display', serif;
|
| 115 |
+
font-size: 2.4rem;
|
| 116 |
+
color: var(--navy);
|
| 117 |
+
margin: 0;
|
| 118 |
+
letter-spacing: -0.03em;
|
| 119 |
+
}
|
| 120 |
+
.taxbot-header .badge {
|
| 121 |
+
background: var(--saffron);
|
| 122 |
+
color: var(--navy);
|
| 123 |
+
font-family: 'JetBrains Mono', monospace;
|
| 124 |
+
font-size: 0.65rem;
|
| 125 |
+
font-weight: 700;
|
| 126 |
+
padding: 3px 10px;
|
| 127 |
+
border-radius: 2px;
|
| 128 |
+
letter-spacing: 0.12em;
|
| 129 |
+
text-transform: uppercase;
|
| 130 |
+
align-self: flex-start;
|
| 131 |
+
margin-top: 0.6rem;
|
| 132 |
+
}
|
| 133 |
+
.taxbot-subtitle {
|
| 134 |
+
color: var(--muted);
|
| 135 |
+
font-size: 0.95rem;
|
| 136 |
+
margin-bottom: 1.5rem;
|
| 137 |
+
}
|
| 138 |
+
|
| 139 |
+
/* ── Chat Messages ── */
|
| 140 |
+
.chat-user {
|
| 141 |
+
background: var(--navy);
|
| 142 |
+
color: var(--cream);
|
| 143 |
+
border-radius: 12px 12px 2px 12px;
|
| 144 |
+
padding: 1rem 1.25rem;
|
| 145 |
+
margin: 0.75rem 0 0.75rem 3rem;
|
| 146 |
+
font-size: 0.95rem;
|
| 147 |
+
line-height: 1.6;
|
| 148 |
+
box-shadow: 0 2px 8px rgba(13,27,42,0.15);
|
| 149 |
+
}
|
| 150 |
+
.chat-bot {
|
| 151 |
+
background: var(--card-bg);
|
| 152 |
+
border: 1px solid var(--border);
|
| 153 |
+
border-left: 4px solid var(--teal);
|
| 154 |
+
border-radius: 2px 12px 12px 12px;
|
| 155 |
+
padding: 1rem 1.25rem;
|
| 156 |
+
margin: 0.75rem 3rem 0.75rem 0;
|
| 157 |
+
font-size: 0.95rem;
|
| 158 |
+
line-height: 1.7;
|
| 159 |
+
box-shadow: 0 2px 8px rgba(0,0,0,0.06);
|
| 160 |
+
}
|
| 161 |
+
.chat-bot .source-tag {
|
| 162 |
+
font-family: 'JetBrains Mono', monospace;
|
| 163 |
+
font-size: 0.7rem;
|
| 164 |
+
color: var(--teal);
|
| 165 |
+
background: rgba(26,122,110,0.1);
|
| 166 |
+
padding: 2px 8px;
|
| 167 |
+
border-radius: 3px;
|
| 168 |
+
display: inline-block;
|
| 169 |
+
margin-top: 0.75rem;
|
| 170 |
+
margin-right: 0.4rem;
|
| 171 |
+
}
|
| 172 |
+
|
| 173 |
+
/* ── Notice Summary Card ── */
|
| 174 |
+
.notice-card {
|
| 175 |
+
background: #fff8f0;
|
| 176 |
+
border: 1.5px solid var(--saffron);
|
| 177 |
+
border-radius: 8px;
|
| 178 |
+
padding: 1.25rem;
|
| 179 |
+
margin: 1rem 0;
|
| 180 |
+
}
|
| 181 |
+
.notice-card h4 {
|
| 182 |
+
font-family: 'DM Serif Display', serif;
|
| 183 |
+
color: var(--navy);
|
| 184 |
+
margin: 0 0 0.5rem;
|
| 185 |
+
font-size: 1.1rem;
|
| 186 |
+
}
|
| 187 |
+
.notice-card .deadline {
|
| 188 |
+
background: var(--red);
|
| 189 |
+
color: white;
|
| 190 |
+
font-family: 'JetBrains Mono', monospace;
|
| 191 |
+
font-size: 0.75rem;
|
| 192 |
+
font-weight: 600;
|
| 193 |
+
padding: 3px 10px;
|
| 194 |
+
border-radius: 3px;
|
| 195 |
+
display: inline-block;
|
| 196 |
+
margin-top: 0.5rem;
|
| 197 |
+
}
|
| 198 |
+
|
| 199 |
+
/* ── Status Pills ── */
|
| 200 |
+
.status-ok { background:#d4edda; color:#1a5e31; padding:3px 10px; border-radius:12px; font-size:0.8rem; font-weight:600; }
|
| 201 |
+
.status-warn { background:#fff3cd; color:#856404; padding:3px 10px; border-radius:12px; font-size:0.8rem; font-weight:600; }
|
| 202 |
+
.status-err { background:#f8d7da; color:#721c24; padding:3px 10px; border-radius:12px; font-size:0.8rem; font-weight:600; }
|
| 203 |
+
|
| 204 |
+
/* ── Input Area ── */
|
| 205 |
+
.stTextInput > div > div > input,
|
| 206 |
+
.stTextArea > div > div > textarea {
|
| 207 |
+
border: 1.5px solid var(--border) !important;
|
| 208 |
+
border-radius: 6px !important;
|
| 209 |
+
font-family: 'DM Sans', sans-serif !important;
|
| 210 |
+
background: white !important;
|
| 211 |
+
}
|
| 212 |
+
.stTextInput > div > div > input:focus,
|
| 213 |
+
.stTextArea > div > div > textarea:focus {
|
| 214 |
+
border-color: var(--teal) !important;
|
| 215 |
+
box-shadow: 0 0 0 3px rgba(26,122,110,0.15) !important;
|
| 216 |
+
}
|
| 217 |
+
.stButton > button {
|
| 218 |
+
background: var(--teal) !important;
|
| 219 |
+
color: white !important;
|
| 220 |
+
border: none !important;
|
| 221 |
+
border-radius: 6px !important;
|
| 222 |
+
font-weight: 600 !important;
|
| 223 |
+
padding: 0.5rem 1.5rem !important;
|
| 224 |
+
}
|
| 225 |
+
.stButton > button:hover {
|
| 226 |
+
background: #155f55 !important;
|
| 227 |
+
transform: translateY(-1px);
|
| 228 |
+
box-shadow: 0 4px 12px rgba(26,122,110,0.3) !important;
|
| 229 |
+
}
|
| 230 |
+
|
| 231 |
+
/* ── Tabs ── */
|
| 232 |
+
.stTabs [data-baseweb="tab-list"] { border-bottom: 2px solid var(--border); }
|
| 233 |
+
.stTabs [data-baseweb="tab"] {
|
| 234 |
+
font-family: 'JetBrains Mono', monospace;
|
| 235 |
+
font-size: 0.8rem;
|
| 236 |
+
letter-spacing: 0.08em;
|
| 237 |
+
color: var(--muted) !important;
|
| 238 |
+
}
|
| 239 |
+
.stTabs [aria-selected="true"] {
|
| 240 |
+
color: var(--navy) !important;
|
| 241 |
+
border-bottom: 2px solid var(--saffron) !important;
|
| 242 |
+
}
|
| 243 |
+
|
| 244 |
+
/* ── Divider ── */
|
| 245 |
+
hr { border-color: var(--border) !important; }
|
| 246 |
+
</style>
|
| 247 |
+
""", unsafe_allow_html=True)
|
| 248 |
+
|
| 249 |
+
|
| 250 |
+
# ─────────────────────────────────────────────
|
| 251 |
+
# SECTION 2: API Client Initialisation
|
| 252 |
+
# ─────────────────────────────────────────────
|
| 253 |
+
# Business Purpose: Securely load API keys from Hugging Face Secrets
|
| 254 |
+
# (or .env locally). Never hard-code keys in source code.
|
| 255 |
+
|
| 256 |
+
@st.cache_resource
|
| 257 |
+
def get_llm():
|
| 258 |
+
"""
|
| 259 |
+
Returns a LangChain-wrapped Claude 3.5 Sonnet instance.
|
| 260 |
+
Claude handles all the statutory reasoning and answer generation.
|
| 261 |
+
"""
|
| 262 |
+
api_key = os.environ.get("ANTHROPIC_API_KEY", "")
|
| 263 |
+
if not api_key:
|
| 264 |
+
st.warning("⚠️ ANTHROPIC_API_KEY not set. Reasoning engine offline.", icon="⚠️")
|
| 265 |
+
return None
|
| 266 |
+
return ChatAnthropic(
|
| 267 |
+
model="claude-sonnet-4-5", # Claude Sonnet 4.5 — smart + efficient
|
| 268 |
+
api_key=api_key,
|
| 269 |
+
temperature=0.1, # Low temp = more deterministic legal answers
|
| 270 |
+
max_tokens=1500,
|
| 271 |
+
)
|
| 272 |
+
|
| 273 |
+
@st.cache_resource
|
| 274 |
+
def get_openai_client():
|
| 275 |
+
"""
|
| 276 |
+
Returns an OpenAI client used exclusively for Vision-based notice parsing
|
| 277 |
+
(GPT-4o's multimodal capability).
|
| 278 |
+
"""
|
| 279 |
+
api_key = os.environ.get("OPENAI_API_KEY", "")
|
| 280 |
+
if not api_key:
|
| 281 |
+
return None
|
| 282 |
+
return OpenAI(api_key=api_key)
|
| 283 |
+
|
| 284 |
+
@st.cache_resource
|
| 285 |
+
def get_embeddings():
|
| 286 |
+
"""
|
| 287 |
+
Returns an OpenAI Embeddings model for converting text chunks
|
| 288 |
+
into vectors stored in ChromaDB.
|
| 289 |
+
"""
|
| 290 |
+
api_key = os.environ.get("OPENAI_API_KEY", "")
|
| 291 |
+
if not api_key:
|
| 292 |
+
return None
|
| 293 |
+
return OpenAIEmbeddings(
|
| 294 |
+
model="text-embedding-3-small", # Cost-effective, high quality
|
| 295 |
+
api_key=api_key
|
| 296 |
+
)
|
| 297 |
+
|
| 298 |
+
|
| 299 |
+
# ─────────────────────────────────────────────
|
| 300 |
+
# SECTION 3: Session State Initialisation
|
| 301 |
+
# ─────────────────────────────────────────────
|
| 302 |
+
# Think of session_state as the app's short-term memory per user session.
|
| 303 |
+
|
| 304 |
+
def init_session_state():
|
| 305 |
+
defaults = {
|
| 306 |
+
"chat_history": [], # List of {"role": "user"/"bot", "content": "..."}
|
| 307 |
+
"vectorstore": None, # ChromaDB instance (built when user uploads PDFs)
|
| 308 |
+
"kb_doc_count": 0, # Number of chunks indexed
|
| 309 |
+
"kb_file_names": [], # Names of uploaded files for display
|
| 310 |
+
"notice_result": None, # Last parsed notice result
|
| 311 |
+
}
|
| 312 |
+
for key, val in defaults.items():
|
| 313 |
+
if key not in st.session_state:
|
| 314 |
+
st.session_state[key] = val
|
| 315 |
+
|
| 316 |
+
init_session_state()
|
| 317 |
+
|
| 318 |
+
|
| 319 |
+
# ─────────────────────────────────────────────
|
| 320 |
+
# SECTION 3b: AUTO-PRELOAD on Startup
|
| 321 |
+
# ─────────────────────────────────────────────
|
| 322 |
+
# Business Purpose: When deployed on Hugging Face Spaces, this block
|
| 323 |
+
# runs ONCE per session and loads all PDFs from the 'docs/' folder
|
| 324 |
+
# automatically. Judges see a fully-ready Knowledge Base on first load.
|
| 325 |
+
# The @st.cache_resource on embeddings/LLM ensures this is efficient.
|
| 326 |
+
|
| 327 |
+
@st.cache_resource(show_spinner=False)
|
| 328 |
+
def autoload_knowledge_base():
|
| 329 |
+
"""
|
| 330 |
+
Cached function — runs only ONCE per app instance (not per user session).
|
| 331 |
+
Loads all PDFs from the docs/ folder into ChromaDB.
|
| 332 |
+
Returns (vectorstore, chunk_count, file_names) or (None, 0, []) if no docs found.
|
| 333 |
+
"""
|
| 334 |
+
docs_folder = "docs" # Relative path — matches your HF Space folder structure
|
| 335 |
+
embeddings = get_embeddings()
|
| 336 |
+
if embeddings is None:
|
| 337 |
+
return None, 0, []
|
| 338 |
+
|
| 339 |
+
if not os.path.exists(docs_folder):
|
| 340 |
+
return None, 0, []
|
| 341 |
+
|
| 342 |
+
pdf_files = [f for f in os.listdir(docs_folder) if f.lower().endswith(".pdf")]
|
| 343 |
+
if not pdf_files:
|
| 344 |
+
return None, 0, []
|
| 345 |
+
|
| 346 |
+
all_pages = []
|
| 347 |
+
for pdf_name in pdf_files:
|
| 348 |
+
try:
|
| 349 |
+
loader = PyPDFLoader(os.path.join(docs_folder, pdf_name))
|
| 350 |
+
pages = loader.load()
|
| 351 |
+
for page in pages:
|
| 352 |
+
page.metadata["source"] = pdf_name
|
| 353 |
+
all_pages.extend(pages)
|
| 354 |
+
except Exception:
|
| 355 |
+
continue
|
| 356 |
+
|
| 357 |
+
if not all_pages:
|
| 358 |
+
return None, 0, []
|
| 359 |
+
|
| 360 |
+
splitter = RecursiveCharacterTextSplitter(
|
| 361 |
+
chunk_size=800, chunk_overlap=150,
|
| 362 |
+
separators=["\n\n", "\n", ".", " "]
|
| 363 |
+
)
|
| 364 |
+
chunks = splitter.split_documents(all_pages)
|
| 365 |
+
|
| 366 |
+
vectorstore = Chroma.from_documents(
|
| 367 |
+
documents=chunks,
|
| 368 |
+
embedding=embeddings,
|
| 369 |
+
collection_name="taxbot_preloaded_kb",
|
| 370 |
+
)
|
| 371 |
+
return vectorstore, len(chunks), pdf_files
|
| 372 |
+
|
| 373 |
+
|
| 374 |
+
# Run the auto-preload and populate session state (only if KB not already set)
|
| 375 |
+
if st.session_state["vectorstore"] is None:
|
| 376 |
+
_vs, _count, _files = autoload_knowledge_base()
|
| 377 |
+
if _vs is not None:
|
| 378 |
+
st.session_state["vectorstore"] = _vs
|
| 379 |
+
st.session_state["kb_doc_count"] = _count
|
| 380 |
+
st.session_state["kb_file_names"] = _files
|
| 381 |
+
st.session_state["kb_preloaded"] = True # Flag to show "Pre-loaded" badge
|
| 382 |
+
|
| 383 |
+
|
| 384 |
+
# ─────────────────────────────────────────────
|
| 385 |
+
# SECTION 4: ENGINE 1 — Knowledge Base Builder
|
| 386 |
+
# ─────────────────────────────────────────────
|
| 387 |
+
# Business Logic:
|
| 388 |
+
# User uploads PDF circulars/acts → we split them into manageable chunks →
|
| 389 |
+
# embed each chunk as a vector → store in ChromaDB.
|
| 390 |
+
# Later, when a user asks a question, we retrieve the top-K most relevant
|
| 391 |
+
# chunks as "context" for Claude (this is RAG).
|
| 392 |
+
|
| 393 |
+
def build_knowledge_base(uploaded_files: list) -> tuple[Chroma | None, int]:
|
| 394 |
+
"""
|
| 395 |
+
Ingests a list of uploaded PDF files into a ChromaDB vector store.
|
| 396 |
+
|
| 397 |
+
Args:
|
| 398 |
+
uploaded_files: List of Streamlit UploadedFile objects.
|
| 399 |
+
|
| 400 |
+
Returns:
|
| 401 |
+
(vectorstore, chunk_count) — the ChromaDB instance and total chunks indexed.
|
| 402 |
+
"""
|
| 403 |
+
embeddings = get_embeddings()
|
| 404 |
+
if embeddings is None:
|
| 405 |
+
st.error("OpenAI API key required for building the Knowledge Base.")
|
| 406 |
+
return None, 0
|
| 407 |
+
|
| 408 |
+
all_chunks = []
|
| 409 |
+
|
| 410 |
+
# ── Step 1: Load and parse each PDF ��─────────────────────────────
|
| 411 |
+
with st.spinner("📄 Reading and parsing PDFs..."):
|
| 412 |
+
for uploaded_file in uploaded_files:
|
| 413 |
+
# Save to temp file (PyPDFLoader requires a file path)
|
| 414 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
|
| 415 |
+
tmp.write(uploaded_file.getvalue())
|
| 416 |
+
tmp_path = tmp.name
|
| 417 |
+
|
| 418 |
+
loader = PyPDFLoader(tmp_path)
|
| 419 |
+
raw_pages = loader.load()
|
| 420 |
+
|
| 421 |
+
# Add source metadata to each page for citation tracking
|
| 422 |
+
for page in raw_pages:
|
| 423 |
+
page.metadata["source"] = uploaded_file.name
|
| 424 |
+
|
| 425 |
+
all_chunks.extend(raw_pages)
|
| 426 |
+
os.unlink(tmp_path) # Clean up temp file
|
| 427 |
+
|
| 428 |
+
# ── Step 2: Chunk the text ────────────────────────────────────────
|
| 429 |
+
# Why chunk? LLMs have context limits. Smaller chunks = more precise retrieval.
|
| 430 |
+
# chunk_size=800 chars ≈ ~200 tokens. Overlap=150 prevents context loss at edges.
|
| 431 |
+
with st.spinner("✂️ Chunking documents into retrievable segments..."):
|
| 432 |
+
splitter = RecursiveCharacterTextSplitter(
|
| 433 |
+
chunk_size=800,
|
| 434 |
+
chunk_overlap=150,
|
| 435 |
+
separators=["\n\n", "\n", ".", " "], # Prefer splitting at paragraphs
|
| 436 |
+
)
|
| 437 |
+
chunks = splitter.split_documents(all_chunks)
|
| 438 |
+
|
| 439 |
+
# ── Step 3: Embed and store in ChromaDB ──────────────────────────
|
| 440 |
+
with st.spinner(f"🧠 Embedding {len(chunks)} chunks into vector database..."):
|
| 441 |
+
vectorstore = Chroma.from_documents(
|
| 442 |
+
documents=chunks,
|
| 443 |
+
embedding=embeddings,
|
| 444 |
+
collection_name="taxbot_knowledge_base",
|
| 445 |
+
)
|
| 446 |
+
|
| 447 |
+
return vectorstore, len(chunks)
|
| 448 |
+
|
| 449 |
+
|
| 450 |
+
def build_knowledge_base_from_folder(folder_path: str = "docs") -> tuple:
|
| 451 |
+
"""
|
| 452 |
+
AUTO-PRELOAD FUNCTION: Ingests all PDFs from a local folder on startup.
|
| 453 |
+
This runs automatically when the app launches on Hugging Face Spaces,
|
| 454 |
+
so the Knowledge Base is ready without any manual uploads.
|
| 455 |
+
|
| 456 |
+
Business Purpose: Judges open the app → KB is already loaded with all
|
| 457 |
+
CBDT Circulars and Finance Act → they can ask questions immediately.
|
| 458 |
+
|
| 459 |
+
Args:
|
| 460 |
+
folder_path: Path to the folder containing pre-loaded PDFs.
|
| 461 |
+
On Hugging Face this is the 'docs/' folder in your Space repo.
|
| 462 |
+
|
| 463 |
+
Returns:
|
| 464 |
+
(vectorstore, chunk_count, file_names) tuple
|
| 465 |
+
"""
|
| 466 |
+
embeddings = get_embeddings()
|
| 467 |
+
if embeddings is None:
|
| 468 |
+
return None, 0, []
|
| 469 |
+
|
| 470 |
+
# Check if the docs folder exists
|
| 471 |
+
if not os.path.exists(folder_path):
|
| 472 |
+
return None, 0, []
|
| 473 |
+
|
| 474 |
+
# Find all PDFs in the folder
|
| 475 |
+
pdf_files = [
|
| 476 |
+
f for f in os.listdir(folder_path)
|
| 477 |
+
if f.lower().endswith(".pdf")
|
| 478 |
+
]
|
| 479 |
+
|
| 480 |
+
if not pdf_files:
|
| 481 |
+
return None, 0, []
|
| 482 |
+
|
| 483 |
+
all_pages = []
|
| 484 |
+
|
| 485 |
+
# ── Load each PDF from disk (no temp files needed — we have direct paths) ──
|
| 486 |
+
for pdf_name in pdf_files:
|
| 487 |
+
pdf_path = os.path.join(folder_path, pdf_name)
|
| 488 |
+
try:
|
| 489 |
+
loader = PyPDFLoader(pdf_path)
|
| 490 |
+
pages = loader.load()
|
| 491 |
+
# Tag each page with its source filename for citations
|
| 492 |
+
for page in pages:
|
| 493 |
+
page.metadata["source"] = pdf_name
|
| 494 |
+
all_pages.extend(pages)
|
| 495 |
+
except Exception as e:
|
| 496 |
+
st.warning(f"Could not load {pdf_name}: {e}")
|
| 497 |
+
continue
|
| 498 |
+
|
| 499 |
+
if not all_pages:
|
| 500 |
+
return None, 0, []
|
| 501 |
+
|
| 502 |
+
# ── Chunk ────────────────────────────────────────────────────────
|
| 503 |
+
splitter = RecursiveCharacterTextSplitter(
|
| 504 |
+
chunk_size=800,
|
| 505 |
+
chunk_overlap=150,
|
| 506 |
+
separators=["\n\n", "\n", ".", " "],
|
| 507 |
+
)
|
| 508 |
+
chunks = splitter.split_documents(all_pages)
|
| 509 |
+
|
| 510 |
+
# ── Embed into ChromaDB ──────────────────────────────────────────
|
| 511 |
+
vectorstore = Chroma.from_documents(
|
| 512 |
+
documents=chunks,
|
| 513 |
+
embedding=embeddings,
|
| 514 |
+
collection_name="taxbot_knowledge_base",
|
| 515 |
+
)
|
| 516 |
+
|
| 517 |
+
return vectorstore, len(chunks), pdf_files
|
| 518 |
+
|
| 519 |
+
|
| 520 |
+
# ─────────────────────────────────────────────
|
| 521 |
+
# SECTION 5: ENGINE 2 — Generative Reasoning Chain
|
| 522 |
+
# ─────────────────────────────────────────────
|
| 523 |
+
# Business Logic:
|
| 524 |
+
# When a user asks a tax question, we:
|
| 525 |
+
# (a) Retrieve top-4 most relevant document chunks from ChromaDB.
|
| 526 |
+
# (b) Feed those chunks + the user's question into Claude via a
|
| 527 |
+
# carefully crafted prompt that enforces statutory accuracy.
|
| 528 |
+
|
| 529 |
+
# ── The System Prompt — this is the "personality" of TaxBot AI ───────
|
| 530 |
+
TAXBOT_PROMPT_TEMPLATE = """
|
| 531 |
+
You are TaxBot AI, an expert Indian tax compliance assistant for MSMEs and individual taxpayers.
|
| 532 |
+
Your answers must be:
|
| 533 |
+
1. GROUNDED: Only use information from the provided context (retrieved statutory excerpts).
|
| 534 |
+
2. PLAIN-LANGUAGE: Explain complex legal provisions in simple business terms.
|
| 535 |
+
3. STRUCTURED: Use bullet points and section references where helpful.
|
| 536 |
+
4. HONEST: If the context does not contain enough information, say so clearly.
|
| 537 |
+
Never fabricate section numbers or circular references.
|
| 538 |
+
5. ACTIONABLE: End with a clear "What you should do" recommendation.
|
| 539 |
+
|
| 540 |
+
RETRIEVED STATUTORY CONTEXT:
|
| 541 |
+
──────────────────────────────
|
| 542 |
+
{context}
|
| 543 |
+
──────────────────────────────
|
| 544 |
+
|
| 545 |
+
USER QUESTION: {question}
|
| 546 |
+
|
| 547 |
+
TAXBOT AI RESPONSE:
|
| 548 |
+
"""
|
| 549 |
+
|
| 550 |
+
TAXBOT_PROMPT = PromptTemplate(
|
| 551 |
+
input_variables=["context", "question"],
|
| 552 |
+
template=TAXBOT_PROMPT_TEMPLATE
|
| 553 |
+
)
|
| 554 |
+
|
| 555 |
+
|
| 556 |
+
def get_tax_answer(question: str, vectorstore: Chroma) -> dict:
|
| 557 |
+
"""
|
| 558 |
+
Runs the RAG pipeline: retrieve relevant law chunks, then ask Claude.
|
| 559 |
+
|
| 560 |
+
Args:
|
| 561 |
+
question: The user's tax query string.
|
| 562 |
+
vectorstore: The populated ChromaDB instance.
|
| 563 |
+
|
| 564 |
+
Returns:
|
| 565 |
+
dict with keys: "answer" (str), "sources" (list of source filenames)
|
| 566 |
+
"""
|
| 567 |
+
llm = get_llm()
|
| 568 |
+
if llm is None:
|
| 569 |
+
return {"answer": "⚠️ LLM not configured. Please set ANTHROPIC_API_KEY.", "sources": []}
|
| 570 |
+
|
| 571 |
+
# Build a RetrievalQA chain with our custom prompt
|
| 572 |
+
qa_chain = RetrievalQA.from_chain_type(
|
| 573 |
+
llm=llm,
|
| 574 |
+
chain_type="stuff", # "stuff" = all chunks stuffed into one prompt
|
| 575 |
+
retriever=vectorstore.as_retriever(
|
| 576 |
+
search_type="similarity",
|
| 577 |
+
search_kwargs={"k": 4} # Retrieve top 4 most relevant chunks
|
| 578 |
+
),
|
| 579 |
+
chain_type_kwargs={"prompt": TAXBOT_PROMPT},
|
| 580 |
+
return_source_documents=True, # We want to show citations
|
| 581 |
+
)
|
| 582 |
+
|
| 583 |
+
result = qa_chain.invoke({"query": question})
|
| 584 |
+
|
| 585 |
+
# Extract unique source file names for citation display
|
| 586 |
+
sources = list({
|
| 587 |
+
doc.metadata.get("source", "Unknown")
|
| 588 |
+
for doc in result.get("source_documents", [])
|
| 589 |
+
})
|
| 590 |
+
|
| 591 |
+
return {
|
| 592 |
+
"answer": result.get("result", "No answer generated."),
|
| 593 |
+
"sources": sources
|
| 594 |
+
}
|
| 595 |
+
|
| 596 |
+
|
| 597 |
+
# ─────────────────────────────────────────────
|
| 598 |
+
# SECTION 6: ENGINE 3 — Notice Interpreter (Vision AI)
|
| 599 |
+
# ─────────────────────────────────────────────
|
| 600 |
+
# Business Logic:
|
| 601 |
+
# User uploads a scanned tax notice image or PDF.
|
| 602 |
+
# We encode it as base64 and send to GPT-4o Vision.
|
| 603 |
+
# The model extracts key details: notice type, demands, deadlines, required action.
|
| 604 |
+
|
| 605 |
+
NOTICE_SYSTEM_PROMPT = """
|
| 606 |
+
You are an expert Indian tax notice analyst. When given a tax notice image,
|
| 607 |
+
extract and summarize the following in a structured JSON format:
|
| 608 |
+
|
| 609 |
+
{
|
| 610 |
+
"notice_type": "e.g., Section 143(1) Intimation / GST ASMT-10 / etc.",
|
| 611 |
+
"assessment_year": "AY 20XX-XX",
|
| 612 |
+
"taxpayer_pan": "PAN or GSTIN if visible",
|
| 613 |
+
"key_discrepancy": "Plain-language description of what the department found",
|
| 614 |
+
"amount_involved": "₹ amount of demand or refund",
|
| 615 |
+
"deadline": "Date by which taxpayer must respond or pay",
|
| 616 |
+
"required_action": "Specific steps the taxpayer must take",
|
| 617 |
+
"severity": "LOW / MEDIUM / HIGH",
|
| 618 |
+
"severity_reason": "Brief reason for severity classification"
|
| 619 |
+
}
|
| 620 |
+
|
| 621 |
+
If any field is not visible in the notice, set it to "Not specified".
|
| 622 |
+
"""
|
| 623 |
+
|
| 624 |
+
|
| 625 |
+
def parse_tax_notice(image_bytes: bytes, file_type: str = "image/jpeg") -> dict | None:
|
| 626 |
+
"""
|
| 627 |
+
Sends a notice image to GPT-4o Vision and returns a structured summary.
|
| 628 |
+
|
| 629 |
+
Args:
|
| 630 |
+
image_bytes: Raw bytes of the uploaded notice image.
|
| 631 |
+
file_type: MIME type of the image (image/jpeg, image/png, etc.)
|
| 632 |
+
|
| 633 |
+
Returns:
|
| 634 |
+
Parsed dict with notice details, or None on failure.
|
| 635 |
+
"""
|
| 636 |
+
import json
|
| 637 |
+
|
| 638 |
+
client = get_openai_client()
|
| 639 |
+
if client is None:
|
| 640 |
+
return {"error": "OpenAI API key not configured. Vision engine offline."}
|
| 641 |
+
|
| 642 |
+
# Encode image to base64 for the Vision API
|
| 643 |
+
b64_image = base64.b64encode(image_bytes).decode("utf-8")
|
| 644 |
+
|
| 645 |
+
with st.spinner("🔍 Analysing notice with Vision AI..."):
|
| 646 |
+
response = client.chat.completions.create(
|
| 647 |
+
model="gpt-4o",
|
| 648 |
+
messages=[
|
| 649 |
+
{
|
| 650 |
+
"role": "system",
|
| 651 |
+
"content": NOTICE_SYSTEM_PROMPT
|
| 652 |
+
},
|
| 653 |
+
{
|
| 654 |
+
"role": "user",
|
| 655 |
+
"content": [
|
| 656 |
+
{
|
| 657 |
+
"type": "image_url",
|
| 658 |
+
"image_url": {
|
| 659 |
+
"url": f"data:{file_type};base64,{b64_image}",
|
| 660 |
+
"detail": "high" # High detail for text-heavy docs
|
| 661 |
+
}
|
| 662 |
+
},
|
| 663 |
+
{
|
| 664 |
+
"type": "text",
|
| 665 |
+
"text": "Please analyse this Indian tax notice and extract the structured information as specified."
|
| 666 |
+
}
|
| 667 |
+
]
|
| 668 |
+
}
|
| 669 |
+
],
|
| 670 |
+
max_tokens=1000,
|
| 671 |
+
temperature=0.0, # Zero temp for factual extraction
|
| 672 |
+
)
|
| 673 |
+
|
| 674 |
+
raw_text = response.choices[0].message.content.strip()
|
| 675 |
+
|
| 676 |
+
# Strip markdown code fences if present (GPT sometimes wraps JSON)
|
| 677 |
+
raw_text = raw_text.replace("```json", "").replace("```", "").strip()
|
| 678 |
+
|
| 679 |
+
try:
|
| 680 |
+
return json.loads(raw_text)
|
| 681 |
+
except json.JSONDecodeError:
|
| 682 |
+
# If JSON parsing fails, return the raw text in a wrapper
|
| 683 |
+
return {"notice_type": "Parsed (raw)", "raw_response": raw_text}
|
| 684 |
+
|
| 685 |
+
|
| 686 |
+
# ─────────────────────────────────────────────
|
| 687 |
+
# SECTION 7: UI LAYOUT — Sidebar
|
| 688 |
+
# ─────────────────────────────────────────────
|
| 689 |
+
|
| 690 |
+
with st.sidebar:
|
| 691 |
+
st.markdown("## ⚖️ TaxBot AI")
|
| 692 |
+
st.markdown("*Your Indian Tax Co-pilot*")
|
| 693 |
+
st.markdown("---")
|
| 694 |
+
|
| 695 |
+
# ── Knowledge Base Builder ────────────────────────────────────────
|
| 696 |
+
st.markdown("### 📚 Knowledge Base")
|
| 697 |
+
st.caption("Upload CBDT Circulars, Finance Acts, GST notifications, or any tax PDF.")
|
| 698 |
+
|
| 699 |
+
uploaded_pdfs = st.file_uploader(
|
| 700 |
+
"Upload Tax Documents (PDF)",
|
| 701 |
+
type=["pdf"],
|
| 702 |
+
accept_multiple_files=True,
|
| 703 |
+
help="These will be ingested into ChromaDB to power the RAG engine."
|
| 704 |
+
)
|
| 705 |
+
|
| 706 |
+
if st.button("⚡ Build Knowledge Base", key="build_kb"):
|
| 707 |
+
if not uploaded_pdfs:
|
| 708 |
+
st.warning("Please upload at least one PDF first.")
|
| 709 |
+
else:
|
| 710 |
+
vectorstore, chunk_count = build_knowledge_base(uploaded_pdfs)
|
| 711 |
+
if vectorstore:
|
| 712 |
+
st.session_state["vectorstore"] = vectorstore
|
| 713 |
+
st.session_state["kb_doc_count"] = chunk_count
|
| 714 |
+
st.session_state["kb_file_names"] = [f.name for f in uploaded_pdfs]
|
| 715 |
+
st.success(f"✅ Knowledge Base ready! {chunk_count} chunks indexed.")
|
| 716 |
+
|
| 717 |
+
# Show current KB status
|
| 718 |
+
if st.session_state["vectorstore"]:
|
| 719 |
+
is_preloaded = st.session_state.get("kb_preloaded", False)
|
| 720 |
+
label = "● Pre-loaded KB Active" if is_preloaded else "● KB Active"
|
| 721 |
+
st.markdown(
|
| 722 |
+
f'<span class="status-ok">{label} — {st.session_state["kb_doc_count"]} chunks</span>',
|
| 723 |
+
unsafe_allow_html=True
|
| 724 |
+
)
|
| 725 |
+
if is_preloaded:
|
| 726 |
+
st.caption("✅ Core tax documents loaded automatically.")
|
| 727 |
+
st.caption("Sources:")
|
| 728 |
+
for fname in st.session_state["kb_file_names"]:
|
| 729 |
+
st.caption(f" • {fname}")
|
| 730 |
+
else:
|
| 731 |
+
st.markdown('<span class="status-warn">● KB Not Built</span>', unsafe_allow_html=True)
|
| 732 |
+
st.caption("No docs/ folder found. Upload PDFs above to build manually.")
|
| 733 |
+
|
| 734 |
+
st.markdown("---")
|
| 735 |
+
|
| 736 |
+
# ── Settings ─────────────────────────────────────────────────────
|
| 737 |
+
st.markdown("### ⚙️ Settings")
|
| 738 |
+
|
| 739 |
+
show_sources = st.toggle("Show source citations", value=True)
|
| 740 |
+
retrieval_k = st.slider("Chunks to retrieve (k)", min_value=2, max_value=8, value=4,
|
| 741 |
+
help="More chunks = broader context. May increase latency.")
|
| 742 |
+
|
| 743 |
+
st.markdown("---")
|
| 744 |
+
st.caption("Built for SIH1285 · Hackathon Demo")
|
| 745 |
+
st.caption("Claude Sonnet 4.5 + GPT-4o Vision")
|
| 746 |
+
|
| 747 |
+
|
| 748 |
+
# ─────────────────────────────────────────────
|
| 749 |
+
# SECTION 8: UI LAYOUT — Main Panel
|
| 750 |
+
# ─────────────────────────────────────────────
|
| 751 |
+
|
| 752 |
+
# ── Header ────────────────────────────────────
|
| 753 |
+
st.markdown("""
|
| 754 |
+
<div class="taxbot-header">
|
| 755 |
+
<h1>TaxBot AI</h1>
|
| 756 |
+
<span class="badge">Beta · SIH1285</span>
|
| 757 |
+
</div>
|
| 758 |
+
<p class="taxbot-subtitle">
|
| 759 |
+
Multimodal Financial Co-pilot · Statutory-accurate guidance for Indian MSMEs & Taxpayers
|
| 760 |
+
</p>
|
| 761 |
+
""", unsafe_allow_html=True)
|
| 762 |
+
|
| 763 |
+
# ── Three Tabs: Chat | Notice Interpreter | Audit Risk ───────────────
|
| 764 |
+
tab_chat, tab_notice, tab_audit = st.tabs([
|
| 765 |
+
"💬 Tax Advisory Chat",
|
| 766 |
+
"📋 Notice Interpreter",
|
| 767 |
+
"📊 Audit Risk Scanner"
|
| 768 |
+
])
|
| 769 |
+
|
| 770 |
+
|
| 771 |
+
# ══════════════════════════════════════════
|
| 772 |
+
# TAB 1: TAX ADVISORY CHAT
|
| 773 |
+
# ════════════════════════════��═════════════
|
| 774 |
+
with tab_chat:
|
| 775 |
+
|
| 776 |
+
# ── Display chat history ──────────────────────────────────────────
|
| 777 |
+
chat_container = st.container()
|
| 778 |
+
with chat_container:
|
| 779 |
+
if not st.session_state["chat_history"]:
|
| 780 |
+
st.markdown("""
|
| 781 |
+
<div style="text-align:center; padding: 3rem 1rem; color: #9ca3af;">
|
| 782 |
+
<div style="font-size: 2.5rem; margin-bottom: 1rem;">⚖️</div>
|
| 783 |
+
<div style="font-family: 'DM Serif Display', serif; font-size: 1.2rem;
|
| 784 |
+
color: #374151; margin-bottom: 0.5rem;">
|
| 785 |
+
Ask me anything about Indian Tax Law
|
| 786 |
+
</div>
|
| 787 |
+
<div style="font-size: 0.9rem;">
|
| 788 |
+
Upload PDFs to the Knowledge Base, then ask queries like:<br>
|
| 789 |
+
<em>"What are the due dates under Circular 15/2025?"</em><br>
|
| 790 |
+
<em>"Explain the 87A rebate changes under Finance Act 2025."</em><br>
|
| 791 |
+
<em>"What is the penalty for late TDS payment?"</em>
|
| 792 |
+
</div>
|
| 793 |
+
</div>
|
| 794 |
+
""", unsafe_allow_html=True)
|
| 795 |
+
|
| 796 |
+
for msg in st.session_state["chat_history"]:
|
| 797 |
+
if msg["role"] == "user":
|
| 798 |
+
st.markdown(
|
| 799 |
+
f'<div class="chat-user">🧑 {msg["content"]}</div>',
|
| 800 |
+
unsafe_allow_html=True
|
| 801 |
+
)
|
| 802 |
+
else:
|
| 803 |
+
sources_html = ""
|
| 804 |
+
if show_sources and msg.get("sources"):
|
| 805 |
+
for src in msg["sources"]:
|
| 806 |
+
sources_html += f'<span class="source-tag">📎 {src}</span>'
|
| 807 |
+
|
| 808 |
+
st.markdown(
|
| 809 |
+
f'<div class="chat-bot">⚖️ {msg["content"]}{sources_html}</div>',
|
| 810 |
+
unsafe_allow_html=True
|
| 811 |
+
)
|
| 812 |
+
|
| 813 |
+
# ── Input row ────────────────────────────────────────────────────
|
| 814 |
+
st.markdown("<br>", unsafe_allow_html=True)
|
| 815 |
+
col_input, col_btn = st.columns([5, 1])
|
| 816 |
+
|
| 817 |
+
with col_input:
|
| 818 |
+
user_query = st.text_input(
|
| 819 |
+
"Your tax question",
|
| 820 |
+
placeholder="e.g. What is the deadline for filing ITR for AY 2025-26?",
|
| 821 |
+
label_visibility="collapsed",
|
| 822 |
+
key="chat_input"
|
| 823 |
+
)
|
| 824 |
+
with col_btn:
|
| 825 |
+
send_clicked = st.button("Send →", key="send_btn")
|
| 826 |
+
|
| 827 |
+
# ── Handle query submission ───────────────────────────────────────
|
| 828 |
+
if send_clicked and user_query.strip():
|
| 829 |
+
# Add user message to history
|
| 830 |
+
st.session_state["chat_history"].append({
|
| 831 |
+
"role": "user",
|
| 832 |
+
"content": user_query
|
| 833 |
+
})
|
| 834 |
+
|
| 835 |
+
# Route to appropriate engine
|
| 836 |
+
if st.session_state["vectorstore"] is None:
|
| 837 |
+
# No KB built — use Claude without RAG (fallback mode)
|
| 838 |
+
llm = get_llm()
|
| 839 |
+
if llm:
|
| 840 |
+
with st.spinner("🤔 Thinking (no Knowledge Base — using Claude's base knowledge)..."):
|
| 841 |
+
fallback_prompt = f"""
|
| 842 |
+
You are TaxBot AI, an expert Indian tax assistant. Answer the following question
|
| 843 |
+
based on your knowledge of Indian Income Tax Act 1961, GST laws, and CBDT circulars.
|
| 844 |
+
Be specific, structured, and cite relevant sections. End with actionable advice.
|
| 845 |
+
|
| 846 |
+
Question: {user_query}
|
| 847 |
+
"""
|
| 848 |
+
response = llm.invoke(fallback_prompt)
|
| 849 |
+
answer = response.content
|
| 850 |
+
else:
|
| 851 |
+
answer = "⚠️ Both the Knowledge Base and LLM are unavailable. Please check your API keys."
|
| 852 |
+
|
| 853 |
+
st.session_state["chat_history"].append({
|
| 854 |
+
"role": "bot",
|
| 855 |
+
"content": answer,
|
| 856 |
+
"sources": ["Claude base knowledge (no RAG)"]
|
| 857 |
+
})
|
| 858 |
+
|
| 859 |
+
else:
|
| 860 |
+
# RAG mode — retrieve from ChromaDB, then reason with Claude
|
| 861 |
+
with st.spinner("🔎 Searching knowledge base + reasoning..."):
|
| 862 |
+
result = get_tax_answer(user_query, st.session_state["vectorstore"])
|
| 863 |
+
|
| 864 |
+
st.session_state["chat_history"].append({
|
| 865 |
+
"role": "bot",
|
| 866 |
+
"content": result["answer"],
|
| 867 |
+
"sources": result["sources"]
|
| 868 |
+
})
|
| 869 |
+
|
| 870 |
+
st.rerun()
|
| 871 |
+
|
| 872 |
+
# ── Clear chat button ─────────────────────────────────────────────
|
| 873 |
+
if st.session_state["chat_history"]:
|
| 874 |
+
if st.button("🗑 Clear Chat", key="clear_chat"):
|
| 875 |
+
st.session_state["chat_history"] = []
|
| 876 |
+
st.rerun()
|
| 877 |
+
|
| 878 |
+
|
| 879 |
+
# ══════════════════════════════════════════
|
| 880 |
+
# TAB 2: NOTICE INTERPRETER
|
| 881 |
+
# ══════════════════════════════════════════
|
| 882 |
+
with tab_notice:
|
| 883 |
+
st.markdown("### 📋 Tax Notice Interpreter")
|
| 884 |
+
st.markdown(
|
| 885 |
+
"Upload a scanned or digital tax notice (image or PDF screenshot). "
|
| 886 |
+
"The Vision AI engine will extract key information and explain what action you need to take."
|
| 887 |
+
)
|
| 888 |
+
|
| 889 |
+
uploaded_notice = st.file_uploader(
|
| 890 |
+
"Upload Notice (Image: JPG/PNG)",
|
| 891 |
+
type=["jpg", "jpeg", "png"],
|
| 892 |
+
help="For PDF notices, take a screenshot of the main page and upload as PNG/JPG.",
|
| 893 |
+
key="notice_uploader"
|
| 894 |
+
)
|
| 895 |
+
|
| 896 |
+
if uploaded_notice:
|
| 897 |
+
col_preview, col_result = st.columns([1, 1])
|
| 898 |
+
|
| 899 |
+
with col_preview:
|
| 900 |
+
st.markdown("**Preview:**")
|
| 901 |
+
st.image(uploaded_notice, use_container_width=True)
|
| 902 |
+
|
| 903 |
+
with col_result:
|
| 904 |
+
if st.button("🔍 Analyse Notice", key="analyse_notice"):
|
| 905 |
+
image_bytes = uploaded_notice.getvalue()
|
| 906 |
+
file_type = f"image/{uploaded_notice.type.split('/')[-1]}"
|
| 907 |
+
|
| 908 |
+
result = parse_tax_notice(image_bytes, file_type)
|
| 909 |
+
st.session_state["notice_result"] = result
|
| 910 |
+
|
| 911 |
+
# Display results if available
|
| 912 |
+
if st.session_state.get("notice_result"):
|
| 913 |
+
r = st.session_state["notice_result"]
|
| 914 |
+
st.markdown("---")
|
| 915 |
+
|
| 916 |
+
if "error" in r:
|
| 917 |
+
st.error(r["error"])
|
| 918 |
+
elif "raw_response" in r:
|
| 919 |
+
st.info("Raw extraction (structured parsing unavailable):")
|
| 920 |
+
st.write(r["raw_response"])
|
| 921 |
+
else:
|
| 922 |
+
# Severity color mapping
|
| 923 |
+
severity_class = {
|
| 924 |
+
"HIGH": "status-err",
|
| 925 |
+
"MEDIUM": "status-warn",
|
| 926 |
+
"LOW": "status-ok"
|
| 927 |
+
}.get(r.get("severity", "MEDIUM"), "status-warn")
|
| 928 |
+
|
| 929 |
+
st.markdown(f"""
|
| 930 |
+
<div class="notice-card">
|
| 931 |
+
<h4>{r.get('notice_type', 'Tax Notice')}</h4>
|
| 932 |
+
<p><b>Assessment Year:</b> {r.get('assessment_year', 'N/A')}</p>
|
| 933 |
+
<p><b>PAN / GSTIN:</b> {r.get('taxpayer_pan', 'N/A')}</p>
|
| 934 |
+
<hr style="margin: 0.5rem 0;">
|
| 935 |
+
<p><b>🔍 Discrepancy Found:</b><br>{r.get('key_discrepancy', 'N/A')}</p>
|
| 936 |
+
<p><b>💰 Amount Involved:</b> {r.get('amount_involved', 'N/A')}</p>
|
| 937 |
+
<p><b>✅ What You Must Do:</b><br>{r.get('required_action', 'N/A')}</p>
|
| 938 |
+
<span class="{severity_class}">
|
| 939 |
+
{r.get('severity', 'MEDIUM')} PRIORITY
|
| 940 |
+
</span>
|
| 941 |
+
|
| 942 |
+
<span class="deadline">DEADLINE: {r.get('deadline', 'Check notice')}</span>
|
| 943 |
+
</div>
|
| 944 |
+
""", unsafe_allow_html=True)
|
| 945 |
+
|
| 946 |
+
# Offer to explain further via chat
|
| 947 |
+
st.info(
|
| 948 |
+
"💡 Switch to the **Tax Advisory Chat** tab and ask "
|
| 949 |
+
f"\"Explain {r.get('notice_type', 'this notice type')} and my options\" "
|
| 950 |
+
"for detailed statutory guidance."
|
| 951 |
+
)
|
| 952 |
+
|
| 953 |
+
|
| 954 |
+
# ══════════════════════════════════════════
|
| 955 |
+
# TAB 3: AUDIT RISK SCANNER (Placeholder)
|
| 956 |
+
# ══════════════════════════════════════════
|
| 957 |
+
with tab_audit:
|
| 958 |
+
st.markdown("### 📊 Audit Risk Scanner")
|
| 959 |
+
st.markdown(
|
| 960 |
+
"Enter your key financial figures. The ML model (Random Forest) "
|
| 961 |
+
"will estimate your audit risk score based on anomaly patterns."
|
| 962 |
+
)
|
| 963 |
+
st.info(
|
| 964 |
+
"🔧 **Engine Status:** ML model placeholder. "
|
| 965 |
+
"In the full build, a Scikit-Learn Random Forest model trained on "
|
| 966 |
+
"historical audit trigger patterns will power this scanner.",
|
| 967 |
+
icon="ℹ️"
|
| 968 |
+
)
|
| 969 |
+
|
| 970 |
+
col1, col2 = st.columns(2)
|
| 971 |
+
with col1:
|
| 972 |
+
turnover = st.number_input("Annual Turnover (₹ Lakhs)", min_value=0.0, value=50.0, step=1.0)
|
| 973 |
+
gross_profit = st.number_input("Gross Profit (₹ Lakhs)", min_value=0.0, value=8.0, step=0.5)
|
| 974 |
+
tds_claimed = st.number_input("TDS Claimed (₹ Lakhs)", min_value=0.0, value=2.0, step=0.1)
|
| 975 |
+
with col2:
|
| 976 |
+
tax_paid = st.number_input("Total Tax Paid (₹ Lakhs)", min_value=0.0, value=3.5, step=0.1)
|
| 977 |
+
deductions_80c = st.number_input("80C/80D Deductions (₹ Lakhs)", min_value=0.0, value=1.5, step=0.1)
|
| 978 |
+
cash_deposits = st.number_input("Cash Deposits in FY (₹ Lakhs)", min_value=0.0, value=5.0, step=0.5)
|
| 979 |
+
|
| 980 |
+
if st.button("⚡ Run Audit Risk Scan", key="audit_scan"):
|
| 981 |
+
# ── Placeholder Rule-Based Score (replace with sklearn RF model) ──
|
| 982 |
+
risk_score = 0
|
| 983 |
+
flags = []
|
| 984 |
+
|
| 985 |
+
gp_ratio = (gross_profit / turnover * 100) if turnover > 0 else 0
|
| 986 |
+
if gp_ratio < 8:
|
| 987 |
+
risk_score += 25
|
| 988 |
+
flags.append(f"Low gross profit ratio ({gp_ratio:.1f}%) — industry avg ~10-15%")
|
| 989 |
+
if deductions_80c > 1.5:
|
| 990 |
+
risk_score += 20
|
| 991 |
+
flags.append(f"80C deductions (₹{deductions_80c}L) exceed ₹1.5L limit")
|
| 992 |
+
if cash_deposits > turnover * 0.3:
|
| 993 |
+
risk_score += 30
|
| 994 |
+
flags.append(f"High cash deposit ratio ({cash_deposits/turnover*100:.0f}% of turnover)")
|
| 995 |
+
if tds_claimed > tax_paid * 0.8:
|
| 996 |
+
risk_score += 15
|
| 997 |
+
flags.append("High TDS-to-tax-paid ratio — possible TDS mismatch")
|
| 998 |
+
|
| 999 |
+
risk_score = min(risk_score, 100)
|
| 1000 |
+
|
| 1001 |
+
# Display result
|
| 1002 |
+
if risk_score >= 60:
|
| 1003 |
+
color, label = "#c0392b", "HIGH RISK"
|
| 1004 |
+
elif risk_score >= 30:
|
| 1005 |
+
color, label = "#e8851a", "MEDIUM RISK"
|
| 1006 |
+
else:
|
| 1007 |
+
color, label = "#1a7a6e", "LOW RISK"
|
| 1008 |
+
|
| 1009 |
+
st.markdown(f"""
|
| 1010 |
+
<div style="background:{color}15; border: 2px solid {color}; border-radius:8px;
|
| 1011 |
+
padding:1.5rem; margin:1rem 0; text-align:center;">
|
| 1012 |
+
<div style="font-family:'DM Serif Display',serif; font-size:2rem;
|
| 1013 |
+
color:{color}; font-weight:bold;">{risk_score} / 100</div>
|
| 1014 |
+
<div style="color:{color}; font-weight:700; font-family:'JetBrains Mono',monospace;
|
| 1015 |
+
font-size:0.9rem; letter-spacing:0.1em;">{label}</div>
|
| 1016 |
+
</div>
|
| 1017 |
+
""", unsafe_allow_html=True)
|
| 1018 |
+
|
| 1019 |
+
if flags:
|
| 1020 |
+
st.markdown("**⚠️ Risk Flags Detected:**")
|
| 1021 |
+
for flag in flags:
|
| 1022 |
+
st.markdown(f"- {flag}")
|
| 1023 |
+
else:
|
| 1024 |
+
st.success("✅ No significant risk flags detected in your financial profile.")
|
| 1025 |
+
|
| 1026 |
+
st.caption(
|
| 1027 |
+
"Note: This score is based on heuristic rules for the demo. "
|
| 1028 |
+
"The production version uses a Random Forest model trained on audit patterns."
|
| 1029 |
+
)
|
circular-11-2025.pdf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:45a854508983ed56b3cffddb0248775f4c486c1b520feee67a7f8132c56c2fc1
|
| 3 |
+
size 288282
|
circular-15-2025.pdf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f95677aefb3701ec5654e679cda5a06d096eab9167461a7546a19e5509eaf3f9
|
| 3 |
+
size 388985
|
circular-no-14-2025.pdf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a6427527295bc3b07dc75d4d64c5da89e45277d31a94c2c5faa0ab1c919e4fd4
|
| 3 |
+
size 572468
|
income-tax-act-1961-as-amended-by-finance-act-2025.pdf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:626134660ea5ca236d967eb5ed7d7b989d48f3240f0c4dfaa57dc09846288ff5
|
| 3 |
+
size 5371727
|
requirements.txt
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# TaxBot AI — Hugging Face Space Requirements
|
| 2 |
+
# Python 3.10+ recommended
|
| 3 |
+
|
| 4 |
+
# ── UI Framework ──────────────────────────
|
| 5 |
+
streamlit>=1.35.0
|
| 6 |
+
|
| 7 |
+
# ── LangChain Orchestration ───────────────
|
| 8 |
+
langchain>=0.2.0
|
| 9 |
+
langchain-community>=0.2.0
|
| 10 |
+
langchain-anthropic>=0.1.15 # Claude via LangChain
|
| 11 |
+
langchain-openai>=0.1.8 # OpenAI embeddings + GPT-4o Vision
|
| 12 |
+
|
| 13 |
+
# ── LLM Providers ────────────────────────
|
| 14 |
+
anthropic>=0.28.0 # Claude API (direct client)
|
| 15 |
+
openai>=1.35.0 # GPT-4o Vision + Embeddings
|
| 16 |
+
|
| 17 |
+
# ── Vector Database ───────────────────────
|
| 18 |
+
chromadb>=0.5.0
|
| 19 |
+
|
| 20 |
+
# ── Document Processing ───────────────────
|
| 21 |
+
pypdf>=4.0.0 # PDF loading for LangChain
|
| 22 |
+
|
| 23 |
+
# ── ML / Audit Risk Engine ────────────────
|
| 24 |
+
scikit-learn>=1.4.0 # Random Forest audit risk model
|
| 25 |
+
pandas>=2.0.0
|
| 26 |
+
numpy>=1.26.0
|
| 27 |
+
|
| 28 |
+
# ── Utilities ─────────────────────────────
|
| 29 |
+
python-dotenv>=1.0.0 # .env support for local dev
|
waiver-of-interest-circular-no-13-2025.pdf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1aa1afd3a54010aacffd7ed09ca1bd8b33906db98da8e4ff20cce2e20ad485ad
|
| 3 |
+
size 410954
|