Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import pandas as pd | |
| from io import BytesIO | |
| import os | |
| import json | |
| from datetime import datetime | |
| import firebase_admin | |
| from firebase_admin import credentials, firestore | |
| from dar_processor import preprocess_pdf_text | |
| from gemini_utils import get_structured_data_with_gemini, get_harmonised_titles | |
| from models import ParsedDARReport, HarmonisedPara | |
| # Firebase setup | |
| FIREBASE_CREDENTIALS = os.environ.get("FIREBASE_CREDENTIALS") | |
| if FIREBASE_CREDENTIALS: | |
| # Load credentials from environment variable (preferred for security) | |
| cred = credentials.Certificate(json.loads(FIREBASE_CREDENTIALS)) | |
| else: | |
| # Fallback to reading from firebase.json file | |
| if not os.path.exists("firebase.json"): | |
| raise ValueError("firebase.json not found and FIREBASE_CREDENTIALS not set.") | |
| cred = credentials.Certificate("firebase.json") | |
| firebase_admin.initialize_app(cred) | |
| db = firestore.client() | |
| request_counts = db.collection('request_counts') | |
| def get_request_count(): | |
| """Retrieve the current request count for today.""" | |
| today = datetime.utcnow().strftime('%Y-%m-%d') | |
| doc_ref = request_counts.document(today) | |
| doc = doc_ref.get() | |
| count = doc.to_dict().get('count', 0) if doc.exists else 0 | |
| return count | |
| def check_request_limit(): | |
| """Check if the request limit for the day has been reached.""" | |
| today = datetime.utcnow().strftime('%Y-%m-%d') | |
| doc_ref = request_counts.document(today) | |
| doc = doc_ref.get() | |
| if not doc.exists: | |
| # Initialize counter for the new day | |
| doc_ref.set({'count': 0}) | |
| count = 0 | |
| else: | |
| count = doc.to_dict().get('count', 0) | |
| if count >= 400: | |
| return False, "Daily request limit of 400 reached. Try again tomorrow." | |
| # Increment the counter | |
| doc_ref.update({'count': firestore.Increment(1)}) | |
| return True, None | |
| def create_html_report(results_with_harmonised: list[dict]) -> str: | |
| """Generates an HTML string to display the results in a styled table.""" | |
| if not results_with_harmonised: | |
| return "<p>No audit paras found or processed.</p>" | |
| style = """ | |
| <style> | |
| body { font-family: sans-serif; } | |
| .styled-table { | |
| border-collapse: collapse; margin: 25px 0; font-size: 0.9em; | |
| min-width: 400px; box-shadow: 0 0 20px rgba(0, 0, 0, 0.15); | |
| border-radius: 8px; overflow: hidden; | |
| } | |
| .styled-table thead tr { background-color: #009879; color: #ffffff; text-align: left; } | |
| .styled-table th, .styled-table td { padding: 12px 15px; border-bottom: 1px solid #dddddd; } | |
| .styled-table tbody tr:last-of-type { border-bottom: 2px solid #009879; } | |
| </style> | |
| """ | |
| html = f"{style}<table class='styled-table'><thead><tr><th>Para No.</th><th>Original Audit Para Heading</th><th>Harmonised Audit Para Heading</th><th>Amount Involved (in Lakhs)</th></tr></thead><tbody>" | |
| for item in results_with_harmonised: | |
| para_num = item.get('audit_para_number', 'N/A') | |
| original_heading = item.get('audit_para_heading', 'N/A') | |
| harmonised_heading = item.get('harmonised_audit_para_heading', 'N/A') | |
| amount = f"₹{item.get('revenue_involved_lakhs_rs', 0.0):,.2f} L" | |
| html += f"<tr><td>{para_num}</td><td>{original_heading}</td><td>{harmonised_heading}</td><td>{amount}</td></tr>" | |
| html += "</tbody></table>" | |
| return html | |
| def process_dar_pdf(pdf_file): | |
| """The main processing function, called after successful login.""" | |
| # Check request limit before processing | |
| can_process, error_msg = check_request_limit() | |
| if not can_process: | |
| return error_msg, None, None, f"Requests today: {get_request_count()}/400" | |
| gemini_api_key = os.environ.get("GEMINI_API_KEY") | |
| if not pdf_file: | |
| return "Please upload a PDF file.", None, None, f"Requests today: {get_request_count()}/400" | |
| if not gemini_api_key: | |
| return "Error: GEMINI_API_KEY secret not found in Space settings.", None, None, f"Requests today: {get_request_count()}/400" | |
| # Step 1: Process PDF to text | |
| full_text = preprocess_pdf_text(pdf_file.name) | |
| if full_text.startswith("Error"): | |
| return f"Failed to process PDF: {full_text}", None, None, f"Requests today: {get_request_count()}/400" | |
| # Step 2: Extract structured data | |
| parsed_report = get_structured_data_with_gemini(gemini_api_key, full_text) | |
| if parsed_report.parsing_errors or not parsed_report.audit_paras: | |
| error_msg = parsed_report.parsing_errors or "Could not find any audit paras." | |
| return error_msg, None, None, f"Requests today: {get_request_count()}/400" | |
| # Step 3: Get harmonised titles | |
| original_headings = [p.audit_para_heading for p in parsed_report.audit_paras if p.audit_para_heading] | |
| if not original_headings: | |
| return "Found paras but no headings to harmonise.", None, None, f"Requests today: {get_request_count()}/400" | |
| harmonised_results = get_harmonised_titles(gemini_api_key, full_text, original_headings) | |
| if not harmonised_results: | |
| return "Failed to generate harmonised titles.", None, None, f"Requests today: {get_request_count()}/400" | |
| # Step 4: Combine and prepare outputs | |
| harmonised_map = {item.original_heading: item.harmonised_heading for item in harmonised_results} | |
| final_data_list = [] | |
| for para in parsed_report.audit_paras: | |
| combined_info = (parsed_report.header.dict() if parsed_report.header else {}) | para.dict() | |
| combined_info['harmonised_audit_para_heading'] = harmonised_map.get(para.audit_para_heading, "N/A") | |
| final_data_list.append(combined_info) | |
| html_output = create_html_report(final_data_list) | |
| # Step 5: Create Excel file for download | |
| df = pd.DataFrame(final_data_list) | |
| excel_columns = [ | |
| 'gstin', 'trade_name', 'category', 'audit_group_number', 'audit_para_number', | |
| 'audit_para_heading', 'harmonised_audit_para_heading', 'revenue_involved_lakhs_rs', | |
| 'revenue_recovered_lakhs_rs', 'status_of_para', 'total_amount_detected_overall_rs', | |
| 'total_amount_recovered_overall_rs' | |
| ] | |
| df = df.reindex(columns=excel_columns).fillna('N/A') | |
| output_excel = BytesIO() | |
| df.to_excel(output_excel, index=False, sheet_name='DAR_Extraction') | |
| output_excel.seek(0) | |
| excel_file_name = "dar_extraction_report.xlsx" | |
| with open(excel_file_name, "wb") as f: | |
| f.write(output_excel.getbuffer()) | |
| return "Processing complete.", html_output, gr.File(value=excel_file_name), f"Requests today: {get_request_count()}/400" | |
| # --- Gradio Interface Definition --- | |
| with gr.Blocks(theme=gr.themes.Soft(), title="DAR Harmonisation Tool") as demo: | |
| # --- Login UI (visible initially) --- | |
| with gr.Column(visible=True) as login_ui: | |
| gr.Markdown("# Mumbai CGST Audit Officer Login") | |
| gr.Markdown("Please enter the credentials to access the tool.") | |
| with gr.Row(): | |
| username_input = gr.Textbox(label="Username", placeholder="Enter your username") | |
| password_input = gr.Textbox(label="Password", type="password", placeholder="Enter your password") | |
| login_button = gr.Button("Login", variant="primary") | |
| login_error_msg = gr.Markdown(visible=False) | |
| # --- Main App UI (hidden initially) --- | |
| with gr.Column(visible=False) as main_app_ui: | |
| gr.Markdown("# DAR Draft Audit Report Harmonisation Tool") | |
| gr.Markdown("## Initiative by Mumbai Audit 1 Commissionerate") | |
| gr.Markdown( | |
| "Upload a Departmental Audit Report (DAR) in PDF format. The tool will process it and generate harmonised titles for Audit paras in accordance with GST law." | |
| ) | |
| request_count_output = gr.Textbox(label="Requests Made Today", interactive=False, value="Requests today: 0/400") | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| pdf_input = gr.File(label="Upload DAR PDF", file_types=[".pdf"]) | |
| submit_btn = gr.Button("Process Report", variant="primary") | |
| with gr.Column(scale=2): | |
| status_output = gr.Textbox(label="Processing Status", interactive=False) | |
| excel_output = gr.File(label="Download Excel Report") | |
| gr.Markdown("## Harmonised Audit Para Titles") | |
| html_output = gr.HTML() | |
| submit_btn.click( | |
| fn=process_dar_pdf, | |
| inputs=[pdf_input], | |
| outputs=[status_output, html_output, excel_output, request_count_output] | |
| ) | |
| # --- Login Functionality --- | |
| def login(username, password): | |
| """ | |
| Checks user credentials against secrets. | |
| For production, these are loaded from Hugging Face secrets. | |
| """ | |
| auth_username = os.environ.get("APP_USERNAME") | |
| auth_password = os.environ.get("APP_PASSWORD") | |
| is_valid_user = (username == auth_username and password == auth_password) | |
| if is_valid_user: | |
| # Login successful: hide login UI, show main app, display request count | |
| request_count = get_request_count() | |
| return { | |
| login_ui: gr.update(visible=False), | |
| main_app_ui: gr.update(visible=True), | |
| login_error_msg: gr.update(visible=False), | |
| request_count_output: gr.update(value=f"Requests today: {request_count}/400") | |
| } | |
| else: | |
| # Login failed: keep login UI visible, show error message | |
| return { | |
| login_ui: gr.update(visible=True), | |
| main_app_ui: gr.update(visible=False), | |
| login_error_msg: gr.update(value="<p style='color:red;'>Invalid username or password.</p>", visible=True), | |
| request_count_output: gr.update(value="Requests today: 0/400") | |
| } | |
| login_button.click( | |
| login, | |
| inputs=[username_input, password_input], | |
| outputs=[login_ui, main_app_ui, login_error_msg, request_count_output] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch(debug=True)# import gradio as gr | |
| # import pandas as pd | |
| # from io import BytesIO | |
| # import os | |
| # import json | |
| # from datetime import datetime | |
| # import firebase_admin | |
| # from firebase_admin import credentials, firestore | |
| # from dar_processor import preprocess_pdf_text | |
| # from gemini_utils import get_structured_data_with_gemini, get_harmonised_titles | |
| # from models import ParsedDARReport, HarmonisedPara | |
| # # Firebase setup | |
| # FIREBASE_CREDENTIALS = os.environ.get("FIREBASE_CREDENTIALS") | |
| # if FIREBASE_CREDENTIALS: | |
| # # Load credentials from environment variable (preferred for security) | |
| # cred = credentials.Certificate(json.loads(FIREBASE_CREDENTIALS)) | |
| # else: | |
| # # Fallback to reading from firebase.json file | |
| # if not os.path.exists("firebase.json"): | |
| # raise ValueError("firebase.json not found and FIREBASE_CREDENTIALS not set.") | |
| # cred = credentials.Certificate("firebase.json") | |
| # firebase_admin.initialize_app(cred) | |
| # db = firestore.client() | |
| # request_counts = db.collection('request_counts') | |
| # def get_request_count(): | |
| # """Retrieve the current request count for today.""" | |
| # today = datetime.utcnow().strftime('%Y-%m-%d') | |
| # doc_ref = request_counts.document(today) | |
| # doc = doc_ref.get() | |
| # count = doc.to_dict().get('count', 0) if doc.exists else 0 | |
| # return count | |
| # def check_request_limit(): | |
| # """Check if the request limit for the day has been reached.""" | |
| # today = datetime.utcnow().strftime('%Y-%m-%d') | |
| # doc_ref = request_counts.document(today) | |
| # doc = doc_ref.get() | |
| # if not doc.exists: | |
| # # Initialize counter for the new day | |
| # doc_ref.set({'count': 0}) | |
| # count = 0 | |
| # else: | |
| # count = doc.to_dict().get('count', 0) | |
| # if count >= 400: | |
| # return False, "Daily request limit of 400 reached. Try again tomorrow." | |
| # # Increment the counter | |
| # doc_ref.update({'count': firestore.Increment(1)}) | |
| # return True, None | |
| # def create_html_report(results_with_harmonised: list[dict]) -> str: | |
| # """Generates an HTML string to display the results in a styled table.""" | |
| # if not results_with_harmonised: | |
| # return "<p>No audit paras found or processed.</p>" | |
| # style = """ | |
| # <style> | |
| # body { font-family: sans-serif; } | |
| # .styled-table { | |
| # border-collapse: collapse; margin: 25px 0; font-size: 0.9em; | |
| # min-width: 400px; box-shadow: 0 0 20px rgba(0, 0, 0, 0.15); | |
| # border-radius: 8px; overflow: hidden; | |
| # } | |
| # .styled-table thead tr { background-color: #009879; color: #ffffff; text-align: left; } | |
| # .styled-table th, .styled-table td { padding: 12px 15px; border-bottom: 1px solid #dddddd; } | |
| # .styled-table tbody tr:last-of-type { border-bottom: 2px solid #009879; } | |
| # </style> | |
| # """ | |
| # html = f"{style}<table class='styled-table'><thead><tr><th>Para No.</th><th>Original Audit Para Heading</th><th>Harmonised Audit Para Heading</th><th>Amount Involved (in Lakhs)</th></tr></thead><tbody>" | |
| # for item in results_with_harmonised: | |
| # para_num = item.get('audit_para_number', 'N/A') | |
| # original_heading = item.get('audit_para_heading', 'N/A') | |
| # harmonised_heading = item.get('harmonised_audit_para_heading', 'N/A') | |
| # amount = f"₹{item.get('revenue_involved_lakhs_rs', 0.0):,.2f} L" | |
| # html += f"<tr><td>{para_num}</td><td>{original_heading}</td><td>{harmonised_heading}</td><td>{amount}</td></tr>" | |
| # html += "</tbody></table>" | |
| # return html | |
| # def process_dar_pdf(pdf_file): | |
| # """The main processing function, called after successful login.""" | |
| # # Check request limit before processing | |
| # can_process, error_msg = check_request_limit() | |
| # if not can_process: | |
| # return error_msg, None, None, f"Requests today: {get_request_count()}/400" | |
| # gemini_api_key = os.environ.get("GEMINI_API_KEY") | |
| # if not pdf_file: | |
| # return "Please upload a PDF file.", None, None, f"Requests today: {get_request_count()}/400" | |
| # if not gemini_api_key: | |
| # return "Error: GEMINI_API_KEY secret not found in Space settings.", None, None, f"Requests today: {get_request_count()}/400" | |
| # # Step 1: Process PDF to text | |
| # full_text = preprocess_pdf_text(pdf_file.name) | |
| # if full_text.startswith("Error"): | |
| # return f"Failed to process PDF: {full_text}", None, None, f"Requests today: {get_request_count()}/400" | |
| # # Step 2: Extract structured data | |
| # parsed_report = get_structured_data_with_gemini(gemini_api_key, full_text) | |
| # if parsed_report.parsing_errors or not parsed_report.audit_paras: | |
| # error_msg = parsed_report.parsing_errors or "Could not find any audit paras." | |
| # return error_msg, None, None, f"Requests today: {get_request_count()}/400" | |
| # # Step 3: Get harmonised titles | |
| # original_headings = [p.audit_para_heading for p in parsed_report.audit_paras if p.audit_para_heading] | |
| # if not original_headings: | |
| # return "Found paras but no headings to harmonise.", None, None, f"Requests today: {get_request_count()}/400" | |
| # harmonised_results = get_harmonised_titles(gemini_api_key, full_text, original_headings) | |
| # if not harmonised_results: | |
| # return "Failed to generate harmonised titles.", None, None, f"Requests today: {get_request_count()}/400" | |
| # # Step 4: Combine and prepare outputs | |
| # harmonised_map = {item.original_heading: item.harmonised_heading for item in harmonised_results} | |
| # final_data_list = [] | |
| # for para in parsed_report.audit_paras: | |
| # combined_info = (parsed_report.header.dict() if parsed_report.header else {}) | para.dict() | |
| # combined_info['harmonised_audit_para_heading'] = harmonised_map.get(para.audit_para_heading, "N/A") | |
| # final_data_list.append(combined_info) | |
| # html_output = create_html_report(final_data_list) | |
| # # Step 5: Create Excel file for download | |
| # df = pd.DataFrame(final_data_list) | |
| # excel_columns = [ | |
| # 'gstin', 'trade_name', 'category', 'audit_group_number', 'audit_para_number', | |
| # 'audit_para_heading', 'harmonised_audit_para_heading', 'revenue_involved_lakhs_rs', | |
| # 'revenue_recovered_lakhs_rs', 'status_of_para', 'total_amount_detected_overall_rs', | |
| # 'total_amount_recovered_overall_rs' | |
| # ] | |
| # df = df.reindex(columns=excel_columns).fillna('N/A') | |
| # output_excel = BytesIO() | |
| # df.to_excel(output_excel, index=False, sheet_name='DAR_Extraction') | |
| # output_excel.seek(0) | |
| # excel_file_name = "dar_extraction_report.xlsx" | |
| # with open(excel_file_name, "wb") as f: | |
| # f.write(output_excel.getbuffer()) | |
| # return "Processing complete.", html_output, gr.File(value=excel_file_name), f"Requests today: {get_request_count()}/400" | |
| # # --- Gradio Interface Definition --- | |
| # with gr.Blocks(theme=gr.themes.Soft(), title="DAR Harmonisation Tool") as demo: | |
| # # --- Login UI (visible initially) --- | |
| # with gr.Column(visible=True) as login_ui: | |
| # gr.Markdown("# Audit Officer Login") | |
| # gr.Markdown("Please enter the credentials to access the tool.") | |
| # with gr.Row(): | |
| # username_input = gr.Textbox(label="Username", placeholder="Enter your username") | |
| # password_input = gr.Textbox(label="Password", type="password", placeholder="Enter your password") | |
| # login_button = gr.Button("Login", variant="primary") | |
| # login_error_msg = gr.Markdown(visible=False) | |
| # # --- Main App UI (hidden initially) --- | |
| # with gr.Column(visible=False) as main_app_ui: | |
| # gr.Markdown("# DAR Draft Audit Report Harmonisation Tool") | |
| # gr.Markdown("## Initiative by Mumbai Audit 1 Commissionerate") | |
| # gr.Markdown( | |
| # "Upload a Observation letter to Taxpayer or Departmental Audit Report (DAR) in PDF format. The tool will process it and generate harmonised titles for Audit paras in accordance with GST law." | |
| # ) | |
| # request_count_output = gr.Textbox(label="Requests Made Today", interactive=False, value="Requests today: 0/400") | |
| # with gr.Row(): | |
| # with gr.Column(scale=1): | |
| # pdf_input = gr.File(label="Upload DAR PDF", file_types=[".pdf"]) | |
| # submit_btn = gr.Button("Process Report", variant="primary") | |
| # with gr.Column(scale=2): | |
| # status_output = gr.Textbox(label="Processing Status", interactive=False) | |
| # excel_output = gr.File(label="Download Excel Report") | |
| # gr.Markdown("## Harmonised Audit Para Titles") | |
| # html_output = gr.HTML() | |
| # submit_btn.click( | |
| # fn=process_dar_pdf, | |
| # inputs=[pdf_input], | |
| # outputs=[status_output, html_output, excel_output, request_count_output] | |
| # ) | |
| # # --- Login Functionality --- | |
| # def login(username, password): | |
| # """ | |
| # Checks user credentials against secrets. | |
| # For production, these are loaded from Hugging Face secrets. | |
| # """ | |
| # auth_username = os.environ.get("APP_USERNAME") | |
| # auth_password = os.environ.get("APP_PASSWORD") | |
| # is_valid_user = (username == auth_username and password == auth_password) | |
| # if is_valid_user: | |
| # # Login successful: hide login UI, show main app, display request count | |
| # request_count = get_request_count() | |
| # return { | |
| # login_ui: gr.update(visible=False), | |
| # main_app_ui: gr.update(visible=True), | |
| # login_error_msg: gr.update(visible=False), | |
| # request_count_output: gr.update(value=f"Requests today: {request_count}/400") | |
| # } | |
| # else: | |
| # # Login failed: keep login UI visible, show error message | |
| # return { | |
| # login_ui: gr.update(visible=True), | |
| # main_app_ui: gr.update(visible=False), | |
| # login_error_msg: gr.update(value="<p style='color:red;'>Invalid username or password.</p>", visible=True), | |
| # request_count_output: gr.update(value="Requests today: 0/400") | |
| # } | |
| # login_button.click( | |
| # login, | |
| # inputs=[username_input, password_input], | |
| # outputs=[login_ui, main_app_ui, login_error_msg, request_count_output] | |
| # ) | |
| # if __name__ == "__main__": | |
| # demo.launch(debug=True) | |
| # # import pandas as pd | |
| # # from io import BytesIO | |
| # # import os | |
| # # # These imports assume the other python files (dar_processor.py, etc.) are in the same directory. | |
| # # from dar_processor import preprocess_pdf_text | |
| # # from gemini_utils import get_structured_data_with_gemini, get_harmonised_titles | |
| # # from models import ParsedDARReport, HarmonisedPara | |
| # # def create_html_report(results_with_harmonised: list[dict]) -> str: | |
| # # """Generates an HTML string to display the results in a styled table.""" | |
| # # if not results_with_harmonised: | |
| # # return "<p>No audit paras found or processed.</p>" | |
| # # style = """ | |
| # # <style> | |
| # # body { font-family: sans-serif; } | |
| # # .styled-table { | |
| # # border-collapse: collapse; margin: 25px 0; font-size: 0.9em; | |
| # # min-width: 400px; box-shadow: 0 0 20px rgba(0, 0, 0, 0.15); | |
| # # border-radius: 8px; overflow: hidden; | |
| # # } | |
| # # .styled-table thead tr { background-color: #009879; color: #ffffff; text-align: left; } | |
| # # .styled-table th, .styled-table td { padding: 12px 15px; border-bottom: 1px solid #dddddd; } | |
| # # .styled-table tbody tr:last-of-type { border-bottom: 2px solid #009879; } | |
| # # </style> | |
| # # """ | |
| # # html = f"{style}<table class='styled-table'><thead><tr><th>Para No.</th><th>Original Audit Para Heading</th><th>Harmonised Audit Para Heading</th><th>Amount Involved (in Lakhs)</th></tr></thead><tbody>" | |
| # # for item in results_with_harmonised: | |
| # # para_num = item.get('audit_para_number', 'N/A') | |
| # # original_heading = item.get('audit_para_heading', 'N/A') | |
| # # harmonised_heading = item.get('harmonised_audit_para_heading', 'N/A') | |
| # # amount = f"₹{item.get('revenue_involved_lakhs_rs', 0.0):,.2f} L" | |
| # # html += f"<tr><td>{para_num}</td><td>{original_heading}</td><td>{harmonised_heading}</td><td>{amount}</td></tr>" | |
| # # html += "</tbody></table>" | |
| # # return html | |
| # # def process_dar_pdf(pdf_file): | |
| # # """The main processing function, called after successful login.""" | |
| # # gemini_api_key = os.environ.get("GEMINI_API_KEY") | |
| # # if not pdf_file: | |
| # # return "Please upload a PDF file.", None, None | |
| # # if not gemini_api_key: | |
| # # return "Error: GEMINI_API_KEY secret not found in Space settings.", None, None | |
| # # # Step 1: Process PDF to text | |
| # # full_text = preprocess_pdf_text(pdf_file.name) | |
| # # if full_text.startswith("Error"): | |
| # # return f"Failed to process PDF: {full_text}", None, None | |
| # # # Step 2: Extract structured data | |
| # # parsed_report = get_structured_data_with_gemini(gemini_api_key, full_text) | |
| # # if parsed_report.parsing_errors or not parsed_report.audit_paras: | |
| # # error_msg = parsed_report.parsing_errors or "Could not find any audit paras." | |
| # # return error_msg, None, None | |
| # # # Step 3: Get harmonised titles | |
| # # original_headings = [p.audit_para_heading for p in parsed_report.audit_paras if p.audit_para_heading] | |
| # # if not original_headings: | |
| # # return "Found paras but no headings to harmonise.", None, None | |
| # # harmonised_results = get_harmonised_titles(gemini_api_key, full_text, original_headings) | |
| # # if not harmonised_results: | |
| # # return "Failed to generate harmonised titles.", None, None | |
| # # # Step 4: Combine and prepare outputs | |
| # # harmonised_map = {item.original_heading: item.harmonised_heading for item in harmonised_results} | |
| # # final_data_list = [] | |
| # # for para in parsed_report.audit_paras: | |
| # # combined_info = (parsed_report.header.dict() if parsed_report.header else {}) | para.dict() | |
| # # combined_info['harmonised_audit_para_heading'] = harmonised_map.get(para.audit_para_heading, "N/A") | |
| # # final_data_list.append(combined_info) | |
| # # html_output = create_html_report(final_data_list) | |
| # # # Step 5: Create Excel file for download | |
| # # df = pd.DataFrame(final_data_list) | |
| # # excel_columns = [ | |
| # # 'gstin', 'trade_name', 'category', 'audit_group_number', 'audit_para_number', | |
| # # 'audit_para_heading', 'harmonised_audit_para_heading', 'revenue_involved_lakhs_rs', | |
| # # 'revenue_recovered_lakhs_rs', 'status_of_para', 'total_amount_detected_overall_rs', | |
| # # 'total_amount_recovered_overall_rs' | |
| # # ] | |
| # # df = df.reindex(columns=excel_columns).fillna('N/A') | |
| # # output_excel = BytesIO() | |
| # # df.to_excel(output_excel, index=False, sheet_name='DAR_Extraction') | |
| # # output_excel.seek(0) | |
| # # excel_file_name = "dar_extraction_report.xlsx" | |
| # # with open(excel_file_name, "wb") as f: | |
| # # f.write(output_excel.getbuffer()) | |
| # # return "Processing complete.", html_output, gr.File(value=excel_file_name) | |
| # # # --- Gradio Interface Definition --- | |
| # # with gr.Blocks(theme=gr.themes.Soft(), title="DAR Harmonisation Tool") as demo: | |
| # # # --- Login UI (visible initially) --- | |
| # # with gr.Column(visible=True) as login_ui: | |
| # # gr.Markdown("# Audit Officer Login") | |
| # # gr.Markdown("Please enter the credentials to access the tool.") | |
| # # with gr.Row(): | |
| # # username_input = gr.Textbox(label="Username", placeholder="Enter your username") | |
| # # password_input = gr.Textbox(label="Password", type="password", placeholder="Enter your password") | |
| # # login_button = gr.Button("Login", variant="primary") | |
| # # login_error_msg = gr.Markdown(visible=False) | |
| # # # --- Main App UI (hidden initially) --- | |
| # # with gr.Column(visible=False) as main_app_ui: | |
| # # gr.Markdown("# DAR Draft Audit Report Harmonisation Tool") | |
| # # gr.Markdown("## Initiative by Mumbai Audit 1 Commissionerate") | |
| # # gr.Markdown( | |
| # # "Upload a Observation letter to taxpayer or Departmental Audit Report (DAR) in PDF format. The tool will process it and generate harmonised titles for Audit paras in accordance with GST law." | |
| # # ) | |
| # # with gr.Row(): | |
| # # with gr.Column(scale=1): | |
| # # pdf_input = gr.File(label="Upload DAR PDF", file_types=[".pdf"]) | |
| # # submit_btn = gr.Button("Process Report", variant="primary") | |
| # # with gr.Column(scale=2): | |
| # # status_output = gr.Textbox(label="Processing Status", interactive=False) | |
| # # excel_output = gr.File(label="Download Excel Report") | |
| # # gr.Markdown("## Harmonised Audit Para Titles") | |
| # # html_output = gr.HTML() | |
| # # submit_btn.click( | |
| # # fn=process_dar_pdf, | |
| # # inputs=[pdf_input], | |
| # # outputs=[status_output, html_output, excel_output] | |
| # # ) | |
| # # # --- Login Functionality --- | |
| # # def login(username, password): | |
| # # """ | |
| # # Checks user credentials against secrets. | |
| # # For production, these are loaded from Hugging Face secrets. | |
| # # """ | |
| # # # Get credentials from Hugging Face secrets. | |
| # # # Fallback to default values for local testing if secrets are not set. | |
| # # auth_username = os.environ.get("APP_USERNAME") | |
| # # auth_password = os.environ.get("APP_PASSWORD") | |
| # # is_valid_user = (username == auth_username and password == auth_password) | |
| # # if is_valid_user: | |
| # # # Login successful: hide login UI, show main app | |
| # # return { | |
| # # login_ui: gr.update(visible=False), | |
| # # main_app_ui: gr.update(visible=True), | |
| # # login_error_msg: gr.update(visible=False) | |
| # # } | |
| # # else: | |
| # # # Login failed: keep login UI visible, show error message | |
| # # return { | |
| # # login_ui: gr.update(visible=True), | |
| # # main_app_ui: gr.update(visible=False), | |
| # # login_error_msg: gr.update(value="<p style='color:red;'>Invalid username or password.</p>", visible=True) | |
| # # } | |
| # # login_button.click( | |
| # # login, | |
| # # inputs=[username_input, password_input], | |
| # # outputs=[login_ui, main_app_ui, login_error_msg] | |
| # # ) | |
| # # if __name__ == "__main__": | |
| # # demo.launch(debug=True) | |