Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -13,7 +13,7 @@ from models import ParsedDARReport, HarmonisedPara
|
|
| 13 |
# Firebase setup
|
| 14 |
FIREBASE_CREDENTIALS = os.environ.get("FIREBASE_CREDENTIALS")
|
| 15 |
if FIREBASE_CREDENTIALS:
|
| 16 |
-
# Load credentials from environment variable (preferred)
|
| 17 |
cred = credentials.Certificate(json.loads(FIREBASE_CREDENTIALS))
|
| 18 |
else:
|
| 19 |
# Fallback to reading from firebase.json file
|
|
@@ -25,6 +25,14 @@ firebase_admin.initialize_app(cred)
|
|
| 25 |
db = firestore.client()
|
| 26 |
request_counts = db.collection('request_counts')
|
| 27 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
def check_request_limit():
|
| 29 |
"""Check if the request limit for the day has been reached."""
|
| 30 |
today = datetime.utcnow().strftime('%Y-%m-%d')
|
|
@@ -77,33 +85,33 @@ def process_dar_pdf(pdf_file):
|
|
| 77 |
# Check request limit before processing
|
| 78 |
can_process, error_msg = check_request_limit()
|
| 79 |
if not can_process:
|
| 80 |
-
return error_msg, None, None
|
| 81 |
|
| 82 |
gemini_api_key = os.environ.get("GEMINI_API_KEY")
|
| 83 |
if not pdf_file:
|
| 84 |
-
return "Please upload a PDF file.", None, None
|
| 85 |
if not gemini_api_key:
|
| 86 |
-
return "Error: GEMINI_API_KEY secret not found in Space settings.", None, None
|
| 87 |
|
| 88 |
# Step 1: Process PDF to text
|
| 89 |
full_text = preprocess_pdf_text(pdf_file.name)
|
| 90 |
if full_text.startswith("Error"):
|
| 91 |
-
return f"Failed to process PDF: {full_text}", None, None
|
| 92 |
|
| 93 |
# Step 2: Extract structured data
|
| 94 |
parsed_report = get_structured_data_with_gemini(gemini_api_key, full_text)
|
| 95 |
if parsed_report.parsing_errors or not parsed_report.audit_paras:
|
| 96 |
error_msg = parsed_report.parsing_errors or "Could not find any audit paras."
|
| 97 |
-
return error_msg, None, None
|
| 98 |
|
| 99 |
# Step 3: Get harmonised titles
|
| 100 |
original_headings = [p.audit_para_heading for p in parsed_report.audit_paras if p.audit_para_heading]
|
| 101 |
if not original_headings:
|
| 102 |
-
return "Found paras but no headings to harmonise.", None, None
|
| 103 |
|
| 104 |
harmonised_results = get_harmonised_titles(gemini_api_key, full_text, original_headings)
|
| 105 |
if not harmonised_results:
|
| 106 |
-
return "Failed to generate harmonised titles.", None, None
|
| 107 |
|
| 108 |
# Step 4: Combine and prepare outputs
|
| 109 |
harmonised_map = {item.original_heading: item.harmonised_heading for item in harmonised_results}
|
|
@@ -133,14 +141,14 @@ def process_dar_pdf(pdf_file):
|
|
| 133 |
with open(excel_file_name, "wb") as f:
|
| 134 |
f.write(output_excel.getbuffer())
|
| 135 |
|
| 136 |
-
return "Processing complete.", html_output, gr.File(value=excel_file_name)
|
| 137 |
|
| 138 |
# --- Gradio Interface Definition ---
|
| 139 |
with gr.Blocks(theme=gr.themes.Soft(), title="DAR Harmonisation Tool") as demo:
|
| 140 |
|
| 141 |
# --- Login UI (visible initially) ---
|
| 142 |
with gr.Column(visible=True) as login_ui:
|
| 143 |
-
gr.Markdown("#
|
| 144 |
gr.Markdown("Please enter the credentials to access the tool.")
|
| 145 |
with gr.Row():
|
| 146 |
username_input = gr.Textbox(label="Username", placeholder="Enter your username")
|
|
@@ -153,8 +161,9 @@ with gr.Blocks(theme=gr.themes.Soft(), title="DAR Harmonisation Tool") as demo:
|
|
| 153 |
gr.Markdown("# DAR Draft Audit Report Harmonisation Tool")
|
| 154 |
gr.Markdown("## Initiative by Mumbai Audit 1 Commissionerate")
|
| 155 |
gr.Markdown(
|
| 156 |
-
"Upload a Departmental Audit Report (DAR) in PDF format. The tool will process it and generate harmonised titles for Audit paras in accordance with GST law."
|
| 157 |
)
|
|
|
|
| 158 |
with gr.Row():
|
| 159 |
with gr.Column(scale=1):
|
| 160 |
pdf_input = gr.File(label="Upload DAR PDF", file_types=[".pdf"])
|
|
@@ -168,7 +177,7 @@ with gr.Blocks(theme=gr.themes.Soft(), title="DAR Harmonisation Tool") as demo:
|
|
| 168 |
submit_btn.click(
|
| 169 |
fn=process_dar_pdf,
|
| 170 |
inputs=[pdf_input],
|
| 171 |
-
outputs=[status_output, html_output, excel_output]
|
| 172 |
)
|
| 173 |
|
| 174 |
# --- Login Functionality ---
|
|
@@ -183,26 +192,31 @@ with gr.Blocks(theme=gr.themes.Soft(), title="DAR Harmonisation Tool") as demo:
|
|
| 183 |
is_valid_user = (username == auth_username and password == auth_password)
|
| 184 |
|
| 185 |
if is_valid_user:
|
|
|
|
|
|
|
| 186 |
return {
|
| 187 |
login_ui: gr.update(visible=False),
|
| 188 |
main_app_ui: gr.update(visible=True),
|
| 189 |
-
login_error_msg: gr.update(visible=False)
|
|
|
|
| 190 |
}
|
| 191 |
else:
|
|
|
|
| 192 |
return {
|
| 193 |
login_ui: gr.update(visible=True),
|
| 194 |
main_app_ui: gr.update(visible=False),
|
| 195 |
-
login_error_msg: gr.update(value="<p style='color:red;'>Invalid username or password.</p>", visible=True)
|
|
|
|
| 196 |
}
|
| 197 |
|
| 198 |
login_button.click(
|
| 199 |
login,
|
| 200 |
inputs=[username_input, password_input],
|
| 201 |
-
outputs=[login_ui, main_app_ui, login_error_msg]
|
| 202 |
)
|
| 203 |
|
| 204 |
if __name__ == "__main__":
|
| 205 |
-
demo.launch(debug=True)
|
| 206 |
# import pandas as pd
|
| 207 |
# from io import BytesIO
|
| 208 |
# import os
|
|
|
|
| 13 |
# Firebase setup
|
| 14 |
FIREBASE_CREDENTIALS = os.environ.get("FIREBASE_CREDENTIALS")
|
| 15 |
if FIREBASE_CREDENTIALS:
|
| 16 |
+
# Load credentials from environment variable (preferred for security)
|
| 17 |
cred = credentials.Certificate(json.loads(FIREBASE_CREDENTIALS))
|
| 18 |
else:
|
| 19 |
# Fallback to reading from firebase.json file
|
|
|
|
| 25 |
db = firestore.client()
|
| 26 |
request_counts = db.collection('request_counts')
|
| 27 |
|
| 28 |
+
def get_request_count():
|
| 29 |
+
"""Retrieve the current request count for today."""
|
| 30 |
+
today = datetime.utcnow().strftime('%Y-%m-%d')
|
| 31 |
+
doc_ref = request_counts.document(today)
|
| 32 |
+
doc = doc_ref.get()
|
| 33 |
+
count = doc.to_dict().get('count', 0) if doc.exists else 0
|
| 34 |
+
return count
|
| 35 |
+
|
| 36 |
def check_request_limit():
|
| 37 |
"""Check if the request limit for the day has been reached."""
|
| 38 |
today = datetime.utcnow().strftime('%Y-%m-%d')
|
|
|
|
| 85 |
# Check request limit before processing
|
| 86 |
can_process, error_msg = check_request_limit()
|
| 87 |
if not can_process:
|
| 88 |
+
return error_msg, None, None, f"Requests today: {get_request_count()}/400"
|
| 89 |
|
| 90 |
gemini_api_key = os.environ.get("GEMINI_API_KEY")
|
| 91 |
if not pdf_file:
|
| 92 |
+
return "Please upload a PDF file.", None, None, f"Requests today: {get_request_count()}/400"
|
| 93 |
if not gemini_api_key:
|
| 94 |
+
return "Error: GEMINI_API_KEY secret not found in Space settings.", None, None, f"Requests today: {get_request_count()}/400"
|
| 95 |
|
| 96 |
# Step 1: Process PDF to text
|
| 97 |
full_text = preprocess_pdf_text(pdf_file.name)
|
| 98 |
if full_text.startswith("Error"):
|
| 99 |
+
return f"Failed to process PDF: {full_text}", None, None, f"Requests today: {get_request_count()}/400"
|
| 100 |
|
| 101 |
# Step 2: Extract structured data
|
| 102 |
parsed_report = get_structured_data_with_gemini(gemini_api_key, full_text)
|
| 103 |
if parsed_report.parsing_errors or not parsed_report.audit_paras:
|
| 104 |
error_msg = parsed_report.parsing_errors or "Could not find any audit paras."
|
| 105 |
+
return error_msg, None, None, f"Requests today: {get_request_count()}/400"
|
| 106 |
|
| 107 |
# Step 3: Get harmonised titles
|
| 108 |
original_headings = [p.audit_para_heading for p in parsed_report.audit_paras if p.audit_para_heading]
|
| 109 |
if not original_headings:
|
| 110 |
+
return "Found paras but no headings to harmonise.", None, None, f"Requests today: {get_request_count()}/400"
|
| 111 |
|
| 112 |
harmonised_results = get_harmonised_titles(gemini_api_key, full_text, original_headings)
|
| 113 |
if not harmonised_results:
|
| 114 |
+
return "Failed to generate harmonised titles.", None, None, f"Requests today: {get_request_count()}/400"
|
| 115 |
|
| 116 |
# Step 4: Combine and prepare outputs
|
| 117 |
harmonised_map = {item.original_heading: item.harmonised_heading for item in harmonised_results}
|
|
|
|
| 141 |
with open(excel_file_name, "wb") as f:
|
| 142 |
f.write(output_excel.getbuffer())
|
| 143 |
|
| 144 |
+
return "Processing complete.", html_output, gr.File(value=excel_file_name), f"Requests today: {get_request_count()}/400"
|
| 145 |
|
| 146 |
# --- Gradio Interface Definition ---
|
| 147 |
with gr.Blocks(theme=gr.themes.Soft(), title="DAR Harmonisation Tool") as demo:
|
| 148 |
|
| 149 |
# --- Login UI (visible initially) ---
|
| 150 |
with gr.Column(visible=True) as login_ui:
|
| 151 |
+
gr.Markdown("# Audit Officer Login")
|
| 152 |
gr.Markdown("Please enter the credentials to access the tool.")
|
| 153 |
with gr.Row():
|
| 154 |
username_input = gr.Textbox(label="Username", placeholder="Enter your username")
|
|
|
|
| 161 |
gr.Markdown("# DAR Draft Audit Report Harmonisation Tool")
|
| 162 |
gr.Markdown("## Initiative by Mumbai Audit 1 Commissionerate")
|
| 163 |
gr.Markdown(
|
| 164 |
+
"Upload a Observation letter to Taxpayer or Departmental Audit Report (DAR) in PDF format. The tool will process it and generate harmonised titles for Audit paras in accordance with GST law."
|
| 165 |
)
|
| 166 |
+
request_count_output = gr.Textbox(label="Requests Made Today", interactive=False, value="Requests today: 0/400")
|
| 167 |
with gr.Row():
|
| 168 |
with gr.Column(scale=1):
|
| 169 |
pdf_input = gr.File(label="Upload DAR PDF", file_types=[".pdf"])
|
|
|
|
| 177 |
submit_btn.click(
|
| 178 |
fn=process_dar_pdf,
|
| 179 |
inputs=[pdf_input],
|
| 180 |
+
outputs=[status_output, html_output, excel_output, request_count_output]
|
| 181 |
)
|
| 182 |
|
| 183 |
# --- Login Functionality ---
|
|
|
|
| 192 |
is_valid_user = (username == auth_username and password == auth_password)
|
| 193 |
|
| 194 |
if is_valid_user:
|
| 195 |
+
# Login successful: hide login UI, show main app, display request count
|
| 196 |
+
request_count = get_request_count()
|
| 197 |
return {
|
| 198 |
login_ui: gr.update(visible=False),
|
| 199 |
main_app_ui: gr.update(visible=True),
|
| 200 |
+
login_error_msg: gr.update(visible=False),
|
| 201 |
+
request_count_output: gr.update(value=f"Requests today: {request_count}/400")
|
| 202 |
}
|
| 203 |
else:
|
| 204 |
+
# Login failed: keep login UI visible, show error message
|
| 205 |
return {
|
| 206 |
login_ui: gr.update(visible=True),
|
| 207 |
main_app_ui: gr.update(visible=False),
|
| 208 |
+
login_error_msg: gr.update(value="<p style='color:red;'>Invalid username or password.</p>", visible=True),
|
| 209 |
+
request_count_output: gr.update(value="Requests today: 0/400")
|
| 210 |
}
|
| 211 |
|
| 212 |
login_button.click(
|
| 213 |
login,
|
| 214 |
inputs=[username_input, password_input],
|
| 215 |
+
outputs=[login_ui, main_app_ui, login_error_msg, request_count_output]
|
| 216 |
)
|
| 217 |
|
| 218 |
if __name__ == "__main__":
|
| 219 |
+
demo.launch(debug=True)
|
| 220 |
# import pandas as pd
|
| 221 |
# from io import BytesIO
|
| 222 |
# import os
|