fragger246 commited on
Commit
4a73579
·
verified ·
1 Parent(s): d70a72c

Upload taxagent.py

Browse files
Files changed (1) hide show
  1. taxagent.py +170 -0
taxagent.py ADDED
@@ -0,0 +1,170 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import fitz # PyMuPDF for PDF extraction
3
+ from langchain_community.llms import Ollama
4
+ from langchain.chains import LLMChain
5
+ from langchain.prompts import PromptTemplate
6
+ from langchain.memory import ConversationBufferMemory
7
+ from langchain.text_splitter import CharacterTextSplitter
8
+ from langchain.vectorstores import FAISS
9
+ from langchain.embeddings import OllamaEmbeddings
10
+ import hashlib
11
+ import numpy as np
12
+
13
+ # ========================== SESSION STATE INITIALIZATION ========================== #
14
+
15
+ if "memory" not in st.session_state:
16
+ st.session_state.memory = ConversationBufferMemory()
17
+ if "chat_history" not in st.session_state:
18
+ st.session_state.chat_history = []
19
+ if "legal_knowledge_base" not in st.session_state:
20
+ st.session_state.legal_knowledge_base = ""
21
+ if "user_query" not in st.session_state:
22
+ st.session_state.user_query = ""
23
+ if "answer" not in st.session_state:
24
+ st.session_state.answer = ""
25
+ if "vector_db" not in st.session_state:
26
+ st.session_state.vector_db = None
27
+ if "summary" not in st.session_state:
28
+ st.session_state.summary = ""
29
+ if "doc_hash" not in st.session_state:
30
+ st.session_state.doc_hash = ""
31
+
32
+ # ========================== HELPER FUNCTIONS ========================== #
33
+
34
+ def compute_file_hash(file):
35
+ """Computes SHA-256 hash of the uploaded file to check for changes."""
36
+ hasher = hashlib.sha256()
37
+ hasher.update(file.read())
38
+ file.seek(0) # Reset file pointer after reading
39
+ return hasher.hexdigest()
40
+
41
+ def extract_text_from_pdf(pdf_file):
42
+ """Extracts text from a PDF file using PyMuPDF (fitz)."""
43
+ try:
44
+ doc = fitz.open(stream=pdf_file.read(), filetype="pdf")
45
+ pdf_file.seek(0) # Reset file pointer
46
+ text = "\n".join([page.get_text("text") for page in doc])
47
+ return text.strip() if text.strip() else "No extractable text found in PDF."
48
+ except Exception as e:
49
+ return f"Error reading PDF: {e}"
50
+
51
+ def summarize_text(text):
52
+ """Summarizes the extracted legal document using AI."""
53
+ llm = Ollama(model="llama3:8b")
54
+ prompt = PromptTemplate(
55
+ input_variables=["text"],
56
+ template="Summarize this tax policy document concisely:\n{text}"
57
+ )
58
+ chain = LLMChain(llm=llm, prompt=prompt)
59
+ summary = chain.run(text=text)
60
+ return summary
61
+
62
+ def create_vector_db():
63
+ """Converts the extracted legal document into searchable vector embeddings."""
64
+ text = st.session_state.legal_knowledge_base
65
+ if not text:
66
+ return None
67
+
68
+ text_splitter = CharacterTextSplitter(separator="\n", chunk_size=1000, chunk_overlap=150)
69
+ texts = text_splitter.split_text(text)
70
+ embeddings = OllamaEmbeddings(model="llama3")
71
+ return FAISS.from_texts(texts, embeddings)
72
+
73
+ def retrieve_relevant_text(query, vector_db):
74
+ """Fetches relevant sections from the document based on the user's query."""
75
+ if not vector_db:
76
+ return "No legal document uploaded."
77
+
78
+ docs = vector_db.similarity_search(query, k=5)
79
+ retrieved_text = "\n".join([doc.page_content for doc in docs])
80
+ return retrieved_text
81
+
82
+ # ========================== AI TAX COMPUTATION & REASONING ========================== #
83
+
84
+ def compute_tax_details(query):
85
+ """Processes user queries related to tax calculations."""
86
+ import re
87
+
88
+ # Extract income & tax rate from query
89
+ income_match = re.search(r"₹?(\d[\d,]*)", query.replace(",", ""))
90
+ tax_rate_match = re.search(r"(\d+)%", query)
91
+
92
+ if income_match and tax_rate_match:
93
+ income = float(income_match.group(1).replace(",", ""))
94
+ tax_rate = float(tax_rate_match.group(1))
95
+
96
+ computed_tax = round(income * (tax_rate / 100), 2)
97
+ return f"Based on an income of ₹{income:,.2f} and a tax rate of {tax_rate}%, the calculated tax is **₹{computed_tax:,.2f}.**"
98
+
99
+ return None
100
+
101
+ def answer_user_query(query):
102
+ """Answers user queries using retrieved legal text & tax calculations."""
103
+ tax_computation_result = compute_tax_details(query)
104
+
105
+ if tax_computation_result:
106
+ st.session_state.answer = tax_computation_result
107
+ st.session_state.chat_history.append({"query": query, "response": st.session_state.answer})
108
+ return
109
+
110
+ if not st.session_state.vector_db:
111
+ st.error("Please upload a document first.")
112
+ return
113
+
114
+ llm = Ollama(model="llama3:8b")
115
+ retrieved_text = retrieve_relevant_text(query, st.session_state.vector_db)
116
+ combined_context = f"Laws:\n{retrieved_text}\n\nUser Query:\n{query}"
117
+
118
+ prompt_template = PromptTemplate(
119
+ input_variables=["input_text"],
120
+ template="""
121
+ You are an AI legal expert specializing in tax and finance. Answer the user's query using legal context & real-world tax computation.
122
+
123
+ Context:
124
+ {input_text}
125
+ """
126
+ )
127
+
128
+ chain = LLMChain(llm=llm, prompt=prompt_template, memory=st.session_state.memory)
129
+ st.session_state.answer = chain.run(input_text=combined_context)
130
+ st.session_state.chat_history.append({"query": query, "response": st.session_state.answer})
131
+
132
+ # ========================== MAIN STREAMLIT APP ========================== #
133
+
134
+ def main():
135
+ st.title("📜 AI Legal Tax Assistant")
136
+
137
+ uploaded_file = st.file_uploader("📄 Upload Policy PDF", type=["pdf"])
138
+
139
+ if uploaded_file:
140
+ file_hash = compute_file_hash(uploaded_file)
141
+
142
+ if file_hash != st.session_state.doc_hash:
143
+ st.session_state.doc_hash = file_hash
144
+ with st.spinner("Extracting text..."):
145
+ extracted_text = extract_text_from_pdf(uploaded_file)
146
+ st.session_state.legal_knowledge_base = extracted_text
147
+ st.success("Policy Document Uploaded & Stored!")
148
+
149
+ with st.spinner("Generating summary..."):
150
+ st.session_state.summary = summarize_text(extracted_text)
151
+ st.subheader("📄 Document Summary:")
152
+ st.text_area("", st.session_state.summary, height=250)
153
+
154
+ with st.spinner("Indexing document for Q&A..."):
155
+ st.session_state.vector_db = create_vector_db()
156
+ st.success("Document indexed! Now you can ask questions.")
157
+
158
+ st.subheader("💬 Ask Questions:")
159
+ st.session_state.user_query = st.text_input("Enter your question:")
160
+
161
+ if st.button("Ask") and st.session_state.user_query.strip():
162
+ with st.spinner("Thinking..."):
163
+ answer_user_query(st.session_state.user_query)
164
+
165
+ if st.session_state.answer:
166
+ st.markdown("### 🤖 AI Response:")
167
+ st.success(st.session_state.answer)
168
+
169
+ if __name__ == "__main__":
170
+ main()