| """ |
| ╔══════════════════════════════════════════════════════════════════════╗ |
| ║ TaxBot AI — app.py ║ |
| ║ Multimodal Financial Co-pilot for Indian MSMEs & Taxpayers ║ |
| ║ Built for Hugging Face Spaces · Powered by Streamlit + LangChain ║ |
| ╚══════════════════════════════════════════════════════════════════════╝ |
| |
| ARCHITECTURE OVERVIEW (Triple-Engine Hybrid RAG): |
| Engine 1 — Knowledge Base (RAG): PDF → ChromaDB → Retrieval |
| Engine 2 — Generative Reasoning: Retrieved context → Claude → Answer |
| Engine 3 — Notice Interpreter: Image/PDF upload → GPT-4o Vision → Summary |
| |
| DEPLOYMENT: |
| 1. Upload this file + requirements.txt to a Hugging Face Space (Streamlit SDK). |
| 2. Set secrets: ANTHROPIC_API_KEY, OPENAI_API_KEY in HF Space Settings. |
| 3. ChromaDB runs in-memory (no external DB needed for the pilot). |
| """ |
|
|
| |
| |
| |
| import os |
| import io |
| import base64 |
| import tempfile |
| import streamlit as st |
|
|
| |
| from langchain_community.document_loaders import PyPDFLoader |
| from langchain_text_splitters import RecursiveCharacterTextSplitter |
| from langchain_community.vectorstores import Chroma |
| from langchain_openai import OpenAIEmbeddings |
| from langchain_anthropic import ChatAnthropic |
| from langchain_core.prompts import PromptTemplate |
|
|
| |
| try: |
| from langchain.chains import RetrievalQA |
| except ImportError: |
| from langchain_community.chains import RetrievalQA |
|
|
| |
| from openai import OpenAI |
|
|
| |
| st.set_page_config( |
| page_title="TaxBot AI · Indian Tax Co-pilot", |
| page_icon="⚖️", |
| layout="wide", |
| initial_sidebar_state="expanded", |
| ) |
|
|
| |
| |
| |
| |
| |
| |
|
|
| st.markdown(""" |
| <style> |
| @import url('https://fonts.googleapis.com/css2?family=DM+Serif+Display:ital@0;1&family=JetBrains+Mono:wght@400;600&family=DM+Sans:wght@400;500;600&display=swap'); |
| |
| /* ── Root Variables ── */ |
| :root { |
| --navy: #0d1b2a; |
| --saffron: #e8851a; |
| --cream: #f5f0e8; |
| --teal: #1a7a6e; |
| --red: #c0392b; |
| --text: #1a1a2e; |
| --muted: #6b7280; |
| --border: #d4c9b0; |
| --card-bg: #fdfaf5; |
| } |
| |
| /* ── Global Reset ── */ |
| html, body, [class*="css"] { |
| font-family: 'DM Sans', sans-serif; |
| background-color: var(--cream) !important; |
| color: var(--text); |
| } |
| |
| /* ── Sidebar ── */ |
| [data-testid="stSidebar"] { |
| background: var(--navy) !important; |
| border-right: 3px solid var(--saffron); |
| } |
| [data-testid="stSidebar"] * { color: var(--cream) !important; } |
| [data-testid="stSidebar"] h1, |
| [data-testid="stSidebar"] h2, |
| [data-testid="stSidebar"] h3 { color: var(--saffron) !important; } |
| [data-testid="stSidebar"] .stButton > button { |
| background: var(--saffron) !important; |
| color: var(--navy) !important; |
| font-weight: 700 !important; |
| border: none !important; |
| border-radius: 4px !important; |
| width: 100% !important; |
| font-family: 'JetBrains Mono', monospace !important; |
| letter-spacing: 0.05em; |
| } |
| [data-testid="stSidebar"] .stButton > button:hover { |
| background: #f0972a !important; |
| transform: translateY(-1px); |
| box-shadow: 0 4px 12px rgba(232,133,26,0.4) !important; |
| } |
| |
| /* ── Main Header ── */ |
| .taxbot-header { |
| display: flex; |
| align-items: center; |
| gap: 1rem; |
| padding: 1.5rem 0 0.5rem; |
| border-bottom: 2px solid var(--saffron); |
| margin-bottom: 1.5rem; |
| } |
| .taxbot-header h1 { |
| font-family: 'DM Serif Display', serif; |
| font-size: 2.4rem; |
| color: var(--navy); |
| margin: 0; |
| letter-spacing: -0.03em; |
| } |
| .taxbot-header .badge { |
| background: var(--saffron); |
| color: var(--navy); |
| font-family: 'JetBrains Mono', monospace; |
| font-size: 0.65rem; |
| font-weight: 700; |
| padding: 3px 10px; |
| border-radius: 2px; |
| letter-spacing: 0.12em; |
| text-transform: uppercase; |
| align-self: flex-start; |
| margin-top: 0.6rem; |
| } |
| .taxbot-subtitle { |
| color: var(--muted); |
| font-size: 0.95rem; |
| margin-bottom: 1.5rem; |
| } |
| |
| /* ── Chat Messages ── */ |
| .chat-user { |
| background: var(--navy); |
| color: var(--cream); |
| border-radius: 12px 12px 2px 12px; |
| padding: 1rem 1.25rem; |
| margin: 0.75rem 0 0.75rem 3rem; |
| font-size: 0.95rem; |
| line-height: 1.6; |
| box-shadow: 0 2px 8px rgba(13,27,42,0.15); |
| } |
| .chat-bot { |
| background: var(--card-bg); |
| border: 1px solid var(--border); |
| border-left: 4px solid var(--teal); |
| border-radius: 2px 12px 12px 12px; |
| padding: 1rem 1.25rem; |
| margin: 0.75rem 3rem 0.75rem 0; |
| font-size: 0.95rem; |
| line-height: 1.7; |
| box-shadow: 0 2px 8px rgba(0,0,0,0.06); |
| } |
| .chat-bot .source-tag { |
| font-family: 'JetBrains Mono', monospace; |
| font-size: 0.7rem; |
| color: var(--teal); |
| background: rgba(26,122,110,0.1); |
| padding: 2px 8px; |
| border-radius: 3px; |
| display: inline-block; |
| margin-top: 0.75rem; |
| margin-right: 0.4rem; |
| } |
| |
| /* ── Notice Summary Card ── */ |
| .notice-card { |
| background: #fff8f0; |
| border: 1.5px solid var(--saffron); |
| border-radius: 8px; |
| padding: 1.25rem; |
| margin: 1rem 0; |
| } |
| .notice-card h4 { |
| font-family: 'DM Serif Display', serif; |
| color: var(--navy); |
| margin: 0 0 0.5rem; |
| font-size: 1.1rem; |
| } |
| .notice-card .deadline { |
| background: var(--red); |
| color: white; |
| font-family: 'JetBrains Mono', monospace; |
| font-size: 0.75rem; |
| font-weight: 600; |
| padding: 3px 10px; |
| border-radius: 3px; |
| display: inline-block; |
| margin-top: 0.5rem; |
| } |
| |
| /* ── Status Pills ── */ |
| .status-ok { background:#d4edda; color:#1a5e31; padding:3px 10px; border-radius:12px; font-size:0.8rem; font-weight:600; } |
| .status-warn { background:#fff3cd; color:#856404; padding:3px 10px; border-radius:12px; font-size:0.8rem; font-weight:600; } |
| .status-err { background:#f8d7da; color:#721c24; padding:3px 10px; border-radius:12px; font-size:0.8rem; font-weight:600; } |
| |
| /* ── Input Area ── */ |
| .stTextInput > div > div > input, |
| .stTextArea > div > div > textarea { |
| border: 1.5px solid var(--border) !important; |
| border-radius: 6px !important; |
| font-family: 'DM Sans', sans-serif !important; |
| background: white !important; |
| } |
| .stTextInput > div > div > input:focus, |
| .stTextArea > div > div > textarea:focus { |
| border-color: var(--teal) !important; |
| box-shadow: 0 0 0 3px rgba(26,122,110,0.15) !important; |
| } |
| .stButton > button { |
| background: var(--teal) !important; |
| color: white !important; |
| border: none !important; |
| border-radius: 6px !important; |
| font-weight: 600 !important; |
| padding: 0.5rem 1.5rem !important; |
| } |
| .stButton > button:hover { |
| background: #155f55 !important; |
| transform: translateY(-1px); |
| box-shadow: 0 4px 12px rgba(26,122,110,0.3) !important; |
| } |
| |
| /* ── Tabs ── */ |
| .stTabs [data-baseweb="tab-list"] { border-bottom: 2px solid var(--border); } |
| .stTabs [data-baseweb="tab"] { |
| font-family: 'JetBrains Mono', monospace; |
| font-size: 0.8rem; |
| letter-spacing: 0.08em; |
| color: var(--muted) !important; |
| } |
| .stTabs [aria-selected="true"] { |
| color: var(--navy) !important; |
| border-bottom: 2px solid var(--saffron) !important; |
| } |
| |
| /* ── Divider ── */ |
| hr { border-color: var(--border) !important; } |
| </style> |
| """, unsafe_allow_html=True) |
|
|
|
|
| |
| |
| |
| |
| |
|
|
| @st.cache_resource |
| def get_llm(): |
| """ |
| Returns a LangChain-wrapped Claude 3.5 Sonnet instance. |
| Claude handles all the statutory reasoning and answer generation. |
| """ |
| api_key = os.environ.get("ANTHROPIC_API_KEY", "") |
| if not api_key: |
| st.warning("⚠️ ANTHROPIC_API_KEY not set. Reasoning engine offline.", icon="⚠️") |
| return None |
| return ChatAnthropic( |
| model="claude-sonnet-4-5", |
| api_key=api_key, |
| temperature=0.1, |
| max_tokens=1500, |
| ) |
|
|
| @st.cache_resource |
| def get_openai_client(): |
| """ |
| Returns an OpenAI client used exclusively for Vision-based notice parsing |
| (GPT-4o's multimodal capability). |
| """ |
| api_key = os.environ.get("OPENAI_API_KEY", "") |
| if not api_key: |
| return None |
| return OpenAI(api_key=api_key) |
|
|
| @st.cache_resource |
| def get_embeddings(): |
| """ |
| Returns an OpenAI Embeddings model for converting text chunks |
| into vectors stored in ChromaDB. |
| """ |
| api_key = os.environ.get("OPENAI_API_KEY", "") |
| if not api_key: |
| return None |
| return OpenAIEmbeddings( |
| model="text-embedding-3-small", |
| api_key=api_key |
| ) |
|
|
|
|
| |
| |
| |
| |
|
|
| def init_session_state(): |
| defaults = { |
| "chat_history": [], |
| "vectorstore": None, |
| "kb_doc_count": 0, |
| "kb_file_names": [], |
| "notice_result": None, |
| } |
| for key, val in defaults.items(): |
| if key not in st.session_state: |
| st.session_state[key] = val |
|
|
| init_session_state() |
|
|
|
|
| |
| |
| |
| |
| |
| |
| |
|
|
| @st.cache_resource(show_spinner=False) |
| def autoload_knowledge_base(): |
| """ |
| Cached function — runs only ONCE per app instance (not per user session). |
| Loads all PDFs from the docs/ folder into ChromaDB. |
| Returns (vectorstore, chunk_count, file_names) or (None, 0, []) if no docs found. |
| """ |
| docs_folder = "docs" |
| embeddings = get_embeddings() |
| if embeddings is None: |
| return None, 0, [] |
|
|
| if not os.path.exists(docs_folder): |
| return None, 0, [] |
|
|
| pdf_files = [f for f in os.listdir(docs_folder) if f.lower().endswith(".pdf")] |
| if not pdf_files: |
| return None, 0, [] |
|
|
| all_pages = [] |
| for pdf_name in pdf_files: |
| try: |
| loader = PyPDFLoader(os.path.join(docs_folder, pdf_name)) |
| pages = loader.load() |
| for page in pages: |
| page.metadata["source"] = pdf_name |
| all_pages.extend(pages) |
| except Exception: |
| continue |
|
|
| if not all_pages: |
| return None, 0, [] |
|
|
| splitter = RecursiveCharacterTextSplitter( |
| chunk_size=800, chunk_overlap=150, |
| separators=["\n\n", "\n", ".", " "] |
| ) |
| chunks = splitter.split_documents(all_pages) |
|
|
| vectorstore = Chroma.from_documents( |
| documents=chunks, |
| embedding=embeddings, |
| collection_name="taxbot_preloaded_kb", |
| ) |
| return vectorstore, len(chunks), pdf_files |
|
|
|
|
| |
| if st.session_state["vectorstore"] is None: |
| _vs, _count, _files = autoload_knowledge_base() |
| if _vs is not None: |
| st.session_state["vectorstore"] = _vs |
| st.session_state["kb_doc_count"] = _count |
| st.session_state["kb_file_names"] = _files |
| st.session_state["kb_preloaded"] = True |
|
|
|
|
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| def build_knowledge_base(uploaded_files: list) -> tuple[Chroma | None, int]: |
| """ |
| Ingests a list of uploaded PDF files into a ChromaDB vector store. |
| |
| Args: |
| uploaded_files: List of Streamlit UploadedFile objects. |
| |
| Returns: |
| (vectorstore, chunk_count) — the ChromaDB instance and total chunks indexed. |
| """ |
| embeddings = get_embeddings() |
| if embeddings is None: |
| st.error("OpenAI API key required for building the Knowledge Base.") |
| return None, 0 |
|
|
| all_chunks = [] |
|
|
| |
| with st.spinner("📄 Reading and parsing PDFs..."): |
| for uploaded_file in uploaded_files: |
| |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp: |
| tmp.write(uploaded_file.getvalue()) |
| tmp_path = tmp.name |
|
|
| loader = PyPDFLoader(tmp_path) |
| raw_pages = loader.load() |
|
|
| |
| for page in raw_pages: |
| page.metadata["source"] = uploaded_file.name |
|
|
| all_chunks.extend(raw_pages) |
| os.unlink(tmp_path) |
|
|
| |
| |
| |
| with st.spinner("✂️ Chunking documents into retrievable segments..."): |
| splitter = RecursiveCharacterTextSplitter( |
| chunk_size=800, |
| chunk_overlap=150, |
| separators=["\n\n", "\n", ".", " "], |
| ) |
| chunks = splitter.split_documents(all_chunks) |
|
|
| |
| with st.spinner(f"🧠 Embedding {len(chunks)} chunks into vector database..."): |
| vectorstore = Chroma.from_documents( |
| documents=chunks, |
| embedding=embeddings, |
| collection_name="taxbot_knowledge_base", |
| ) |
|
|
| return vectorstore, len(chunks) |
|
|
|
|
| def build_knowledge_base_from_folder(folder_path: str = "docs") -> tuple: |
| """ |
| AUTO-PRELOAD FUNCTION: Ingests all PDFs from a local folder on startup. |
| This runs automatically when the app launches on Hugging Face Spaces, |
| so the Knowledge Base is ready without any manual uploads. |
| |
| Business Purpose: Judges open the app → KB is already loaded with all |
| CBDT Circulars and Finance Act → they can ask questions immediately. |
| |
| Args: |
| folder_path: Path to the folder containing pre-loaded PDFs. |
| On Hugging Face this is the 'docs/' folder in your Space repo. |
| |
| Returns: |
| (vectorstore, chunk_count, file_names) tuple |
| """ |
| embeddings = get_embeddings() |
| if embeddings is None: |
| return None, 0, [] |
|
|
| |
| if not os.path.exists(folder_path): |
| return None, 0, [] |
|
|
| |
| pdf_files = [ |
| f for f in os.listdir(folder_path) |
| if f.lower().endswith(".pdf") |
| ] |
|
|
| if not pdf_files: |
| return None, 0, [] |
|
|
| all_pages = [] |
|
|
| |
| for pdf_name in pdf_files: |
| pdf_path = os.path.join(folder_path, pdf_name) |
| try: |
| loader = PyPDFLoader(pdf_path) |
| pages = loader.load() |
| |
| for page in pages: |
| page.metadata["source"] = pdf_name |
| all_pages.extend(pages) |
| except Exception as e: |
| st.warning(f"Could not load {pdf_name}: {e}") |
| continue |
|
|
| if not all_pages: |
| return None, 0, [] |
|
|
| |
| splitter = RecursiveCharacterTextSplitter( |
| chunk_size=800, |
| chunk_overlap=150, |
| separators=["\n\n", "\n", ".", " "], |
| ) |
| chunks = splitter.split_documents(all_pages) |
|
|
| |
| vectorstore = Chroma.from_documents( |
| documents=chunks, |
| embedding=embeddings, |
| collection_name="taxbot_knowledge_base", |
| ) |
|
|
| return vectorstore, len(chunks), pdf_files |
|
|
|
|
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| |
| TAXBOT_PROMPT_TEMPLATE = """ |
| You are TaxBot AI, an expert Indian tax compliance assistant for MSMEs and individual taxpayers. |
| Your answers must be: |
| 1. GROUNDED: Only use information from the provided context (retrieved statutory excerpts). |
| 2. PLAIN-LANGUAGE: Explain complex legal provisions in simple business terms. |
| 3. STRUCTURED: Use bullet points and section references where helpful. |
| 4. HONEST: If the context does not contain enough information, say so clearly. |
| Never fabricate section numbers or circular references. |
| 5. ACTIONABLE: End with a clear "What you should do" recommendation. |
| |
| RETRIEVED STATUTORY CONTEXT: |
| ────────────────────────────── |
| {context} |
| ────────────────────────────── |
| |
| USER QUESTION: {question} |
| |
| TAXBOT AI RESPONSE: |
| """ |
|
|
| TAXBOT_PROMPT = PromptTemplate( |
| input_variables=["context", "question"], |
| template=TAXBOT_PROMPT_TEMPLATE |
| ) |
|
|
|
|
| def get_tax_answer(question: str, vectorstore: Chroma) -> dict: |
| """ |
| Runs the RAG pipeline: retrieve relevant law chunks, then ask Claude. |
| |
| Args: |
| question: The user's tax query string. |
| vectorstore: The populated ChromaDB instance. |
| |
| Returns: |
| dict with keys: "answer" (str), "sources" (list of source filenames) |
| """ |
| llm = get_llm() |
| if llm is None: |
| return {"answer": "⚠️ LLM not configured. Please set ANTHROPIC_API_KEY.", "sources": []} |
|
|
| |
| qa_chain = RetrievalQA.from_chain_type( |
| llm=llm, |
| chain_type="stuff", |
| retriever=vectorstore.as_retriever( |
| search_type="similarity", |
| search_kwargs={"k": 4} |
| ), |
| chain_type_kwargs={"prompt": TAXBOT_PROMPT}, |
| return_source_documents=True, |
| ) |
|
|
| result = qa_chain.invoke({"query": question}) |
|
|
| |
| sources = list({ |
| doc.metadata.get("source", "Unknown") |
| for doc in result.get("source_documents", []) |
| }) |
|
|
| return { |
| "answer": result.get("result", "No answer generated."), |
| "sources": sources |
| } |
|
|
|
|
| |
| |
| |
| |
| |
| |
| |
|
|
| NOTICE_SYSTEM_PROMPT = """ |
| You are an expert Indian tax notice analyst. When given a tax notice image, |
| extract and summarize the following in a structured JSON format: |
| |
| { |
| "notice_type": "e.g., Section 143(1) Intimation / GST ASMT-10 / etc.", |
| "assessment_year": "AY 20XX-XX", |
| "taxpayer_pan": "PAN or GSTIN if visible", |
| "key_discrepancy": "Plain-language description of what the department found", |
| "amount_involved": "₹ amount of demand or refund", |
| "deadline": "Date by which taxpayer must respond or pay", |
| "required_action": "Specific steps the taxpayer must take", |
| "severity": "LOW / MEDIUM / HIGH", |
| "severity_reason": "Brief reason for severity classification" |
| } |
| |
| If any field is not visible in the notice, set it to "Not specified". |
| """ |
|
|
|
|
| def parse_tax_notice(image_bytes: bytes, file_type: str = "image/jpeg") -> dict | None: |
| """ |
| Sends a notice image to GPT-4o Vision and returns a structured summary. |
| |
| Args: |
| image_bytes: Raw bytes of the uploaded notice image. |
| file_type: MIME type of the image (image/jpeg, image/png, etc.) |
| |
| Returns: |
| Parsed dict with notice details, or None on failure. |
| """ |
| import json |
|
|
| client = get_openai_client() |
| if client is None: |
| return {"error": "OpenAI API key not configured. Vision engine offline."} |
|
|
| |
| b64_image = base64.b64encode(image_bytes).decode("utf-8") |
|
|
| with st.spinner("🔍 Analysing notice with Vision AI..."): |
| response = client.chat.completions.create( |
| model="gpt-4o", |
| messages=[ |
| { |
| "role": "system", |
| "content": NOTICE_SYSTEM_PROMPT |
| }, |
| { |
| "role": "user", |
| "content": [ |
| { |
| "type": "image_url", |
| "image_url": { |
| "url": f"data:{file_type};base64,{b64_image}", |
| "detail": "high" |
| } |
| }, |
| { |
| "type": "text", |
| "text": "Please analyse this Indian tax notice and extract the structured information as specified." |
| } |
| ] |
| } |
| ], |
| max_tokens=1000, |
| temperature=0.0, |
| ) |
|
|
| raw_text = response.choices[0].message.content.strip() |
|
|
| |
| raw_text = raw_text.replace("```json", "").replace("```", "").strip() |
|
|
| try: |
| return json.loads(raw_text) |
| except json.JSONDecodeError: |
| |
| return {"notice_type": "Parsed (raw)", "raw_response": raw_text} |
|
|
|
|
| |
| |
| |
|
|
| with st.sidebar: |
| st.markdown("## ⚖️ TaxBot AI") |
| st.markdown("*Your Indian Tax Co-pilot*") |
| st.markdown("---") |
|
|
| |
| st.markdown("### 📚 Knowledge Base") |
| st.caption("Upload CBDT Circulars, Finance Acts, GST notifications, or any tax PDF.") |
|
|
| uploaded_pdfs = st.file_uploader( |
| "Upload Tax Documents (PDF)", |
| type=["pdf"], |
| accept_multiple_files=True, |
| help="These will be ingested into ChromaDB to power the RAG engine." |
| ) |
|
|
| if st.button("⚡ Build Knowledge Base", key="build_kb"): |
| if not uploaded_pdfs: |
| st.warning("Please upload at least one PDF first.") |
| else: |
| vectorstore, chunk_count = build_knowledge_base(uploaded_pdfs) |
| if vectorstore: |
| st.session_state["vectorstore"] = vectorstore |
| st.session_state["kb_doc_count"] = chunk_count |
| st.session_state["kb_file_names"] = [f.name for f in uploaded_pdfs] |
| st.success(f"✅ Knowledge Base ready! {chunk_count} chunks indexed.") |
|
|
| |
| if st.session_state["vectorstore"]: |
| is_preloaded = st.session_state.get("kb_preloaded", False) |
| label = "● Pre-loaded KB Active" if is_preloaded else "● KB Active" |
| st.markdown( |
| f'<span class="status-ok">{label} — {st.session_state["kb_doc_count"]} chunks</span>', |
| unsafe_allow_html=True |
| ) |
| if is_preloaded: |
| st.caption("✅ Core tax documents loaded automatically.") |
| st.caption("Sources:") |
| for fname in st.session_state["kb_file_names"]: |
| st.caption(f" • {fname}") |
| else: |
| st.markdown('<span class="status-warn">● KB Not Built</span>', unsafe_allow_html=True) |
| st.caption("No docs/ folder found. Upload PDFs above to build manually.") |
|
|
| st.markdown("---") |
|
|
| |
| st.markdown("### ⚙️ Settings") |
|
|
| show_sources = st.toggle("Show source citations", value=True) |
| retrieval_k = st.slider("Chunks to retrieve (k)", min_value=2, max_value=8, value=4, |
| help="More chunks = broader context. May increase latency.") |
|
|
| st.markdown("---") |
| st.caption("Built for SIH1285 · Hackathon Demo") |
| st.caption("Claude Sonnet 4.5 + GPT-4o Vision") |
|
|
|
|
| |
| |
| |
|
|
| |
| st.markdown(""" |
| <div class="taxbot-header"> |
| <h1>TaxBot AI</h1> |
| <span class="badge">Beta · SIH1285</span> |
| </div> |
| <p class="taxbot-subtitle"> |
| Multimodal Financial Co-pilot · Statutory-accurate guidance for Indian MSMEs & Taxpayers |
| </p> |
| """, unsafe_allow_html=True) |
|
|
| |
| tab_chat, tab_notice, tab_audit = st.tabs([ |
| "💬 Tax Advisory Chat", |
| "📋 Notice Interpreter", |
| "📊 Audit Risk Scanner" |
| ]) |
|
|
|
|
| |
| |
| |
| with tab_chat: |
|
|
| |
| chat_container = st.container() |
| with chat_container: |
| if not st.session_state["chat_history"]: |
| st.markdown(""" |
| <div style="text-align:center; padding: 3rem 1rem; color: #9ca3af;"> |
| <div style="font-size: 2.5rem; margin-bottom: 1rem;">⚖️</div> |
| <div style="font-family: 'DM Serif Display', serif; font-size: 1.2rem; |
| color: #374151; margin-bottom: 0.5rem;"> |
| Ask me anything about Indian Tax Law |
| </div> |
| <div style="font-size: 0.9rem;"> |
| Upload PDFs to the Knowledge Base, then ask queries like:<br> |
| <em>"What are the due dates under Circular 15/2025?"</em><br> |
| <em>"Explain the 87A rebate changes under Finance Act 2025."</em><br> |
| <em>"What is the penalty for late TDS payment?"</em> |
| </div> |
| </div> |
| """, unsafe_allow_html=True) |
|
|
| for msg in st.session_state["chat_history"]: |
| if msg["role"] == "user": |
| st.markdown( |
| f'<div class="chat-user">🧑 {msg["content"]}</div>', |
| unsafe_allow_html=True |
| ) |
| else: |
| sources_html = "" |
| if show_sources and msg.get("sources"): |
| for src in msg["sources"]: |
| sources_html += f'<span class="source-tag">📎 {src}</span>' |
|
|
| st.markdown( |
| f'<div class="chat-bot">⚖️ {msg["content"]}{sources_html}</div>', |
| unsafe_allow_html=True |
| ) |
|
|
| |
| st.markdown("<br>", unsafe_allow_html=True) |
| col_input, col_btn = st.columns([5, 1]) |
|
|
| with col_input: |
| user_query = st.text_input( |
| "Your tax question", |
| placeholder="e.g. What is the deadline for filing ITR for AY 2025-26?", |
| label_visibility="collapsed", |
| key="chat_input" |
| ) |
| with col_btn: |
| send_clicked = st.button("Send →", key="send_btn") |
|
|
| |
| if send_clicked and user_query.strip(): |
| |
| st.session_state["chat_history"].append({ |
| "role": "user", |
| "content": user_query |
| }) |
|
|
| |
| if st.session_state["vectorstore"] is None: |
| |
| llm = get_llm() |
| if llm: |
| with st.spinner("🤔 Thinking (no Knowledge Base — using Claude's base knowledge)..."): |
| fallback_prompt = f""" |
| You are TaxBot AI, an expert Indian tax assistant. Answer the following question |
| based on your knowledge of Indian Income Tax Act 1961, GST laws, and CBDT circulars. |
| Be specific, structured, and cite relevant sections. End with actionable advice. |
| |
| Question: {user_query} |
| """ |
| response = llm.invoke(fallback_prompt) |
| answer = response.content |
| else: |
| answer = "⚠️ Both the Knowledge Base and LLM are unavailable. Please check your API keys." |
|
|
| st.session_state["chat_history"].append({ |
| "role": "bot", |
| "content": answer, |
| "sources": ["Claude base knowledge (no RAG)"] |
| }) |
|
|
| else: |
| |
| with st.spinner("🔎 Searching knowledge base + reasoning..."): |
| result = get_tax_answer(user_query, st.session_state["vectorstore"]) |
|
|
| st.session_state["chat_history"].append({ |
| "role": "bot", |
| "content": result["answer"], |
| "sources": result["sources"] |
| }) |
|
|
| st.rerun() |
|
|
| |
| if st.session_state["chat_history"]: |
| if st.button("🗑 Clear Chat", key="clear_chat"): |
| st.session_state["chat_history"] = [] |
| st.rerun() |
|
|
|
|
| |
| |
| |
| with tab_notice: |
| st.markdown("### 📋 Tax Notice Interpreter") |
| st.markdown( |
| "Upload a scanned or digital tax notice (image or PDF screenshot). " |
| "The Vision AI engine will extract key information and explain what action you need to take." |
| ) |
|
|
| uploaded_notice = st.file_uploader( |
| "Upload Notice (Image: JPG/PNG)", |
| type=["jpg", "jpeg", "png"], |
| help="For PDF notices, take a screenshot of the main page and upload as PNG/JPG.", |
| key="notice_uploader" |
| ) |
|
|
| if uploaded_notice: |
| col_preview, col_result = st.columns([1, 1]) |
|
|
| with col_preview: |
| st.markdown("**Preview:**") |
| st.image(uploaded_notice, use_container_width=True) |
|
|
| with col_result: |
| if st.button("🔍 Analyse Notice", key="analyse_notice"): |
| image_bytes = uploaded_notice.getvalue() |
| file_type = f"image/{uploaded_notice.type.split('/')[-1]}" |
|
|
| result = parse_tax_notice(image_bytes, file_type) |
| st.session_state["notice_result"] = result |
|
|
| |
| if st.session_state.get("notice_result"): |
| r = st.session_state["notice_result"] |
| st.markdown("---") |
|
|
| if "error" in r: |
| st.error(r["error"]) |
| elif "raw_response" in r: |
| st.info("Raw extraction (structured parsing unavailable):") |
| st.write(r["raw_response"]) |
| else: |
| |
| severity_class = { |
| "HIGH": "status-err", |
| "MEDIUM": "status-warn", |
| "LOW": "status-ok" |
| }.get(r.get("severity", "MEDIUM"), "status-warn") |
|
|
| st.markdown(f""" |
| <div class="notice-card"> |
| <h4>{r.get('notice_type', 'Tax Notice')}</h4> |
| <p><b>Assessment Year:</b> {r.get('assessment_year', 'N/A')}</p> |
| <p><b>PAN / GSTIN:</b> {r.get('taxpayer_pan', 'N/A')}</p> |
| <hr style="margin: 0.5rem 0;"> |
| <p><b>🔍 Discrepancy Found:</b><br>{r.get('key_discrepancy', 'N/A')}</p> |
| <p><b>💰 Amount Involved:</b> {r.get('amount_involved', 'N/A')}</p> |
| <p><b>✅ What You Must Do:</b><br>{r.get('required_action', 'N/A')}</p> |
| <span class="{severity_class}"> |
| {r.get('severity', 'MEDIUM')} PRIORITY |
| </span> |
| |
| <span class="deadline">DEADLINE: {r.get('deadline', 'Check notice')}</span> |
| </div> |
| """, unsafe_allow_html=True) |
|
|
| |
| st.info( |
| "💡 Switch to the **Tax Advisory Chat** tab and ask " |
| f"\"Explain {r.get('notice_type', 'this notice type')} and my options\" " |
| "for detailed statutory guidance." |
| ) |
|
|
|
|
| |
| |
| |
| with tab_audit: |
| st.markdown("### 📊 Audit Risk Scanner") |
| st.markdown( |
| "Enter your key financial figures. The ML model (Random Forest) " |
| "will estimate your audit risk score based on anomaly patterns." |
| ) |
| st.info( |
| "🔧 **Engine Status:** ML model placeholder. " |
| "In the full build, a Scikit-Learn Random Forest model trained on " |
| "historical audit trigger patterns will power this scanner.", |
| icon="ℹ️" |
| ) |
|
|
| col1, col2 = st.columns(2) |
| with col1: |
| turnover = st.number_input("Annual Turnover (₹ Lakhs)", min_value=0.0, value=50.0, step=1.0) |
| gross_profit = st.number_input("Gross Profit (₹ Lakhs)", min_value=0.0, value=8.0, step=0.5) |
| tds_claimed = st.number_input("TDS Claimed (₹ Lakhs)", min_value=0.0, value=2.0, step=0.1) |
| with col2: |
| tax_paid = st.number_input("Total Tax Paid (₹ Lakhs)", min_value=0.0, value=3.5, step=0.1) |
| deductions_80c = st.number_input("80C/80D Deductions (₹ Lakhs)", min_value=0.0, value=1.5, step=0.1) |
| cash_deposits = st.number_input("Cash Deposits in FY (₹ Lakhs)", min_value=0.0, value=5.0, step=0.5) |
|
|
| if st.button("⚡ Run Audit Risk Scan", key="audit_scan"): |
| |
| risk_score = 0 |
| flags = [] |
|
|
| gp_ratio = (gross_profit / turnover * 100) if turnover > 0 else 0 |
| if gp_ratio < 8: |
| risk_score += 25 |
| flags.append(f"Low gross profit ratio ({gp_ratio:.1f}%) — industry avg ~10-15%") |
| if deductions_80c > 1.5: |
| risk_score += 20 |
| flags.append(f"80C deductions (₹{deductions_80c}L) exceed ₹1.5L limit") |
| if cash_deposits > turnover * 0.3: |
| risk_score += 30 |
| flags.append(f"High cash deposit ratio ({cash_deposits/turnover*100:.0f}% of turnover)") |
| if tds_claimed > tax_paid * 0.8: |
| risk_score += 15 |
| flags.append("High TDS-to-tax-paid ratio — possible TDS mismatch") |
|
|
| risk_score = min(risk_score, 100) |
|
|
| |
| if risk_score >= 60: |
| color, label = "#c0392b", "HIGH RISK" |
| elif risk_score >= 30: |
| color, label = "#e8851a", "MEDIUM RISK" |
| else: |
| color, label = "#1a7a6e", "LOW RISK" |
|
|
| st.markdown(f""" |
| <div style="background:{color}15; border: 2px solid {color}; border-radius:8px; |
| padding:1.5rem; margin:1rem 0; text-align:center;"> |
| <div style="font-family:'DM Serif Display',serif; font-size:2rem; |
| color:{color}; font-weight:bold;">{risk_score} / 100</div> |
| <div style="color:{color}; font-weight:700; font-family:'JetBrains Mono',monospace; |
| font-size:0.9rem; letter-spacing:0.1em;">{label}</div> |
| </div> |
| """, unsafe_allow_html=True) |
|
|
| if flags: |
| st.markdown("**⚠️ Risk Flags Detected:**") |
| for flag in flags: |
| st.markdown(f"- {flag}") |
| else: |
| st.success("✅ No significant risk flags detected in your financial profile.") |
|
|
| st.caption( |
| "Note: This score is based on heuristic rules for the demo. " |
| "The production version uses a Random Forest model trained on audit patterns." |
| ) |
|
|