fragger246
/

taxagent

Model card Files Files and versions

xet

Community

fragger246 commited on Mar 4, 2025

Commit

4a73579

verified ·

1 Parent(s): d70a72c

Upload taxagent.py

Browse files

Files changed (1) hide show

taxagent.py +170 -0

taxagent.py ADDED Viewed

	@@ -0,0 +1,170 @@

+import streamlit as st
+import fitz  # PyMuPDF for PDF extraction
+from langchain_community.llms import Ollama
+from langchain.chains import LLMChain
+from langchain.prompts import PromptTemplate
+from langchain.memory import ConversationBufferMemory
+from langchain.text_splitter import CharacterTextSplitter
+from langchain.vectorstores import FAISS
+from langchain.embeddings import OllamaEmbeddings
+import hashlib
+import numpy as np
+# ========================== SESSION STATE INITIALIZATION ========================== #
+if "memory" not in st.session_state:
+    st.session_state.memory = ConversationBufferMemory()
+if "chat_history" not in st.session_state:
+    st.session_state.chat_history = []
+if "legal_knowledge_base" not in st.session_state:
+    st.session_state.legal_knowledge_base = ""
+if "user_query" not in st.session_state:
+    st.session_state.user_query = ""
+if "answer" not in st.session_state:
+    st.session_state.answer = ""
+if "vector_db" not in st.session_state:
+    st.session_state.vector_db = None
+if "summary" not in st.session_state:
+    st.session_state.summary = ""
+if "doc_hash" not in st.session_state:
+    st.session_state.doc_hash = ""
+# ========================== HELPER FUNCTIONS ========================== #
+def compute_file_hash(file):
+    """Computes SHA-256 hash of the uploaded file to check for changes."""
+    hasher = hashlib.sha256()
+    hasher.update(file.read())
+    file.seek(0)  # Reset file pointer after reading
+    return hasher.hexdigest()
+def extract_text_from_pdf(pdf_file):
+    """Extracts text from a PDF file using PyMuPDF (fitz)."""
+    try:
+        doc = fitz.open(stream=pdf_file.read(), filetype="pdf")
+        pdf_file.seek(0)  # Reset file pointer
+        text = "\n".join([page.get_text("text") for page in doc])
+        return text.strip() if text.strip() else "No extractable text found in PDF."
+    except Exception as e:
+        return f"Error reading PDF: {e}"
+def summarize_text(text):
+    """Summarizes the extracted legal document using AI."""
+    llm = Ollama(model="llama3:8b")
+    prompt = PromptTemplate(
+        input_variables=["text"],
+        template="Summarize this tax policy document concisely:\n{text}"
+    )
+    chain = LLMChain(llm=llm, prompt=prompt)
+    summary = chain.run(text=text)
+    return summary
+def create_vector_db():
+    """Converts the extracted legal document into searchable vector embeddings."""
+    text = st.session_state.legal_knowledge_base
+    if not text:
+        return None
+    text_splitter = CharacterTextSplitter(separator="\n", chunk_size=1000, chunk_overlap=150)
+    texts = text_splitter.split_text(text)
+    embeddings = OllamaEmbeddings(model="llama3")
+    return FAISS.from_texts(texts, embeddings)
+def retrieve_relevant_text(query, vector_db):
+    """Fetches relevant sections from the document based on the user's query."""
+    if not vector_db:
+        return "No legal document uploaded."
+    docs = vector_db.similarity_search(query, k=5)
+    retrieved_text = "\n".join([doc.page_content for doc in docs])
+    return retrieved_text
+# ========================== AI TAX COMPUTATION & REASONING ========================== #
+def compute_tax_details(query):
+    """Processes user queries related to tax calculations."""
+    import re
+    # Extract income & tax rate from query
+    income_match = re.search(r"₹?(\d[\d,]*)", query.replace(",", ""))
+    tax_rate_match = re.search(r"(\d+)%", query)
+    if income_match and tax_rate_match:
+        income = float(income_match.group(1).replace(",", ""))
+        tax_rate = float(tax_rate_match.group(1))
+        computed_tax = round(income * (tax_rate / 100), 2)
+        return f"Based on an income of ₹{income:,.2f} and a tax rate of {tax_rate}%, the calculated tax is **₹{computed_tax:,.2f}.**"
+    return None
+def answer_user_query(query):
+    """Answers user queries using retrieved legal text & tax calculations."""
+    tax_computation_result = compute_tax_details(query)
+    if tax_computation_result:
+        st.session_state.answer = tax_computation_result
+        st.session_state.chat_history.append({"query": query, "response": st.session_state.answer})
+        return
+    if not st.session_state.vector_db:
+        st.error("Please upload a document first.")
+        return
+    llm = Ollama(model="llama3:8b")
+    retrieved_text = retrieve_relevant_text(query, st.session_state.vector_db)
+    combined_context = f"Laws:\n{retrieved_text}\n\nUser Query:\n{query}"
+    prompt_template = PromptTemplate(
+        input_variables=["input_text"],
+        template="""
+        You are an AI legal expert specializing in tax and finance. Answer the user's query using legal context & real-world tax computation.
+        Context:
+        {input_text}
+        """
+    )
+    chain = LLMChain(llm=llm, prompt=prompt_template, memory=st.session_state.memory)
+    st.session_state.answer = chain.run(input_text=combined_context)
+    st.session_state.chat_history.append({"query": query, "response": st.session_state.answer})
+# ========================== MAIN STREAMLIT APP ========================== #
+def main():
+    st.title("📜 AI Legal Tax Assistant")
+    uploaded_file = st.file_uploader("📄 Upload Policy PDF", type=["pdf"])
+    if uploaded_file:
+        file_hash = compute_file_hash(uploaded_file)
+        if file_hash != st.session_state.doc_hash:
+            st.session_state.doc_hash = file_hash
+            with st.spinner("Extracting text..."):
+                extracted_text = extract_text_from_pdf(uploaded_file)
+                st.session_state.legal_knowledge_base = extracted_text
+                st.success("Policy Document Uploaded & Stored!")
+            with st.spinner("Generating summary..."):
+                st.session_state.summary = summarize_text(extracted_text)
+                st.subheader("📄 Document Summary:")
+                st.text_area("", st.session_state.summary, height=250)
+            with st.spinner("Indexing document for Q&A..."):
+                st.session_state.vector_db = create_vector_db()
+                st.success("Document indexed! Now you can ask questions.")
+    st.subheader("💬 Ask Questions:")
+    st.session_state.user_query = st.text_input("Enter your question:")
+    if st.button("Ask") and st.session_state.user_query.strip():
+        with st.spinner("Thinking..."):
+            answer_user_query(st.session_state.user_query)
+    if st.session_state.answer:
+        st.markdown("### 🤖 AI Response:")
+        st.success(st.session_state.answer)
+if __name__ == "__main__":
+    main()