Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import requests | |
| from bs4 import BeautifulSoup | |
| import time | |
| import re | |
| import json | |
| from datetime import datetime | |
| from typing import List, Dict, Tuple, Optional | |
| import concurrent.futures | |
| from dataclasses import dataclass | |
| import logging | |
| # Setup logging | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| class NameCombination: | |
| description: str | |
| first_name: str | |
| middle_name: str | |
| last_name: str | |
| status: str = "Pending" | |
| result: str = "" | |
| error_message: str = "" | |
| class TINValidatorWebScraper: | |
| def __init__(self): | |
| self.session = requests.Session() | |
| self.session.headers.update({ | |
| 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', | |
| 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', | |
| 'Accept-Language': 'en-US,en;q=0.9', | |
| 'Accept-Encoding': 'gzip, deflate, br', | |
| 'Connection': 'keep-alive', | |
| 'Upgrade-Insecure-Requests': '1', | |
| }) | |
| # Updated URLs based on the HTML you provided | |
| self.base_url = "https://revie.bir.gov.ph/tin-validation" | |
| self.submit_url = "https://revie.bir.gov.ph/submitTinValidation" | |
| def get_initial_page(self) -> bool: | |
| """Load the initial TIN validation page to establish session""" | |
| try: | |
| response = self.session.get(self.base_url, timeout=30) | |
| response.raise_for_status() | |
| logger.info("Successfully loaded initial page") | |
| return True | |
| except Exception as e: | |
| logger.error(f"Error loading initial page: {e}") | |
| return False | |
| def validate_tin(self, tin_id: str, first_name: str, middle_name: str, | |
| last_name: str, birthdate: str, gender: str) -> Tuple[bool, str]: | |
| """ | |
| Validate TIN using the updated BIR system | |
| """ | |
| try: | |
| # Load initial page first to establish session | |
| if not self.get_initial_page(): | |
| return False, "Failed to establish session with BIR website" | |
| # Clean TIN - remove any formatting | |
| clean_tin = tin_id.replace("-", "").replace(" ", "").strip() | |
| # Ensure TIN is 9 digits | |
| if not clean_tin.isdigit() or len(clean_tin) != 9: | |
| return False, f"TIN must be exactly 9 digits (got: {len(clean_tin)} digits)" | |
| # Format birthdate to YYYY-MM-DD (HTML date format) | |
| formatted_birthdate = self.format_birthdate_html(birthdate) | |
| if not formatted_birthdate: | |
| return False, "Invalid birthdate format. Please use MM-DD-YYYY or DD-MM-YYYY" | |
| # Prepare form data exactly as shown in the HTML | |
| form_data = { | |
| 'tin': clean_tin, | |
| 'firstName': first_name.strip().upper(), | |
| 'middleName': middle_name.strip().upper() if middle_name else '', | |
| 'lastName': last_name.strip().upper(), | |
| 'gender': gender.upper(), # 'MALE' or 'FEMALE' | |
| 'birthdate': formatted_birthdate | |
| } | |
| logger.info(f"Submitting validation with data: {form_data}") | |
| # Set headers for form submission | |
| headers = { | |
| 'Content-Type': 'application/x-www-form-urlencoded', | |
| 'X-Requested-With': 'XMLHttpRequest', # Important for AJAX requests | |
| 'Referer': self.base_url, | |
| 'Origin': 'https://revie.bir.gov.ph' | |
| } | |
| # Submit form to the correct endpoint | |
| response = self.session.post( | |
| self.submit_url, | |
| data=form_data, | |
| headers=headers, | |
| timeout=60 | |
| ) | |
| logger.info(f"Response status: {response.status_code}") | |
| logger.info(f"Response content: {response.text[:500]}...") | |
| if response.status_code == 200: | |
| return self.parse_json_response(response.text) | |
| else: | |
| return False, f"HTTP Error: {response.status_code} - {response.text[:200]}" | |
| except requests.RequestException as e: | |
| return False, f"Network error: {str(e)}" | |
| except Exception as e: | |
| return False, f"Validation error: {str(e)}" | |
| def parse_json_response(self, response_text: str) -> Tuple[bool, str]: | |
| """Parse the JSON response from BIR system""" | |
| try: | |
| # The response should be JSON according to the HTML code | |
| data = json.loads(response_text) | |
| status = data.get('status', '').upper() | |
| if status == 'RECORD_MATCHED': | |
| # Success case | |
| rdo = data.get('RDO', 'Unknown RDO') | |
| return True, f"β TIN is VALID and registered under {rdo}" | |
| elif status == 'RECORD_NOT_MATCHED': | |
| # Failed case | |
| return False, "β The information provided does not match the records in our database" | |
| else: | |
| # Other cases | |
| return False, f"β Unable to process request. Status: {status}" | |
| except json.JSONDecodeError: | |
| # If not JSON, treat as HTML response | |
| return self.parse_html_response(response_text) | |
| except Exception as e: | |
| return False, f"Error parsing response: {str(e)}" | |
| def parse_html_response(self, html_content: str) -> Tuple[bool, str]: | |
| """Fallback HTML parsing if JSON parsing fails""" | |
| try: | |
| soup = BeautifulSoup(html_content, 'html.parser') | |
| text_content = soup.get_text().lower() | |
| # Look for success indicators | |
| if any(phrase in text_content for phrase in [ | |
| 'valid and registered', 'record matched', 'successfully verified', | |
| 'tin provided is valid', 'validation successful' | |
| ]): | |
| # Try to extract RDO information | |
| rdo_match = re.search(r'(rdo\d+[^.]*)', text_content, re.IGNORECASE) | |
| rdo_info = rdo_match.group(1) if rdo_match else "Unknown RDO" | |
| return True, f"β TIN validation successful - {rdo_info}" | |
| # Look for failure indicators | |
| elif any(phrase in text_content for phrase in [ | |
| 'does not match', 'record not matched', 'cannot process', | |
| 'not found', 'invalid information', 'regret to inform' | |
| ]): | |
| return False, "β Information does not match BIR database records" | |
| # If no clear indicators, return the response | |
| return False, f"Unclear response: {text_content[:200]}..." | |
| except Exception as e: | |
| return False, f"Error parsing HTML response: {str(e)}" | |
| def format_birthdate_html(self, birthdate: str) -> str: | |
| """Format birthdate to HTML date format (YYYY-MM-DD)""" | |
| try: | |
| # Clean the input | |
| clean_date = birthdate.replace("-", "/").replace(" ", "").strip() | |
| # Try different input formats and convert to YYYY-MM-DD | |
| formats_to_try = [ | |
| "%m/%d/%Y", # MM/DD/YYYY | |
| "%d/%m/%Y", # DD/MM/YYYY | |
| "%Y/%m/%d", # YYYY/MM/DD | |
| "%m-%d-%Y", # MM-DD-YYYY | |
| "%d-%m-%Y", # DD-MM-YYYY | |
| "%Y-%m-%d", # YYYY-MM-DD | |
| ] | |
| for fmt in formats_to_try: | |
| try: | |
| date_obj = datetime.strptime(clean_date, fmt) | |
| return date_obj.strftime("%Y-%m-%d") # HTML date format | |
| except ValueError: | |
| continue | |
| # If all formats fail | |
| logger.warning(f"Could not parse birthdate: {birthdate}") | |
| return None | |
| except Exception as e: | |
| logger.error(f"Error formatting birthdate: {e}") | |
| return None | |
| def generate_name_combinations(first_name: str, middle_name: str, last_name: str) -> List[NameCombination]: | |
| """Generate prioritized name combinations with and without middle name""" | |
| combinations = [] | |
| # Clean inputs | |
| first_name = first_name.strip() | |
| middle_name = middle_name.strip() | |
| last_name = last_name.strip() | |
| seen = set() | |
| # Split middle name if contains multiple words | |
| middle_parts = middle_name.split() | |
| if len(middle_parts) >= 2: | |
| # Extract first part from middle name | |
| moved_part = middle_parts[0] | |
| remaining_middle = " ".join(middle_parts[1:]) | |
| combined_first = f"{first_name} {moved_part}" | |
| combinations.append(NameCombination( | |
| description="Split middle: First + first middle word | Remaining middle | Last", | |
| first_name=combined_first, | |
| middle_name=remaining_middle, | |
| last_name=last_name | |
| )) | |
| # 1. Try no-middle combinations first | |
| combo_no_middle = [ | |
| { | |
| "desc": "No middle: First | '' | Last", | |
| "first": first_name, | |
| "middle": "", | |
| "last": last_name | |
| }, | |
| { | |
| "desc": "Combined First + Middle (no middle field): 'First Middle' | '' | Last", | |
| "first": f"{first_name} {middle_name}", | |
| "middle": "", | |
| "last": last_name | |
| } | |
| ] | |
| # 2. Try middle name variations | |
| combo_with_middle = [ | |
| { | |
| "desc": "Standard: First | Middle | Last", | |
| "first": first_name, | |
| "middle": middle_name, | |
| "last": last_name | |
| }, | |
| { | |
| "desc": "Middle Initial: First | M | Last", | |
| "first": first_name, | |
| "middle": middle_name[0] if middle_name else "", | |
| "last": last_name | |
| }, | |
| { | |
| "desc": "Middle Initial with dot: First | M. | Last", | |
| "first": first_name, | |
| "middle": f"{middle_name[0]}." if middle_name else "", | |
| "last": last_name | |
| }, | |
| { | |
| "desc": "Middle with dot: First | Middle. | Last", | |
| "first": first_name, | |
| "middle": f"{middle_name}." if not middle_name.endswith(".") else middle_name, | |
| "last": last_name | |
| }, | |
| { | |
| "desc": "Middle without dot: First | Middle(no dot) | Last", | |
| "first": first_name, | |
| "middle": middle_name.replace(".", ""), | |
| "last": last_name | |
| } | |
| ] | |
| # Merge all combinations | |
| all_combos = combo_no_middle + combo_with_middle | |
| for combo in all_combos: | |
| key = f"{combo['first']}|{combo['middle']}|{combo['last']}" | |
| if key not in seen and combo['first'] and combo['last']: | |
| seen.add(key) | |
| combinations.append(NameCombination( | |
| description=combo["desc"], | |
| first_name=combo["first"], | |
| middle_name=combo["middle"], | |
| last_name=combo["last"] | |
| )) | |
| return combinations | |
| def validate_single_combination(tin_id: str, birthdate: str, gender: str, | |
| combination: NameCombination) -> NameCombination: | |
| """Validate a single name combination""" | |
| validator = TINValidatorWebScraper() | |
| try: | |
| # Add delay to be respectful to the server | |
| time.sleep(3) # Increased delay | |
| logger.info(f"Testing combination: {combination.description}") | |
| is_valid, message = validator.validate_tin( | |
| tin_id, combination.first_name, combination.middle_name, | |
| combination.last_name, birthdate, gender | |
| ) | |
| combination.status = "Success" if is_valid else "Failed" | |
| combination.result = message | |
| combination.error_message = "" if is_valid else message | |
| logger.info(f"Result for {combination.description}: {'SUCCESS' if is_valid else 'FAILED'}") | |
| except Exception as e: | |
| combination.status = "Error" | |
| combination.result = "" | |
| combination.error_message = str(e) | |
| logger.error(f"Error testing {combination.description}: {e}") | |
| return combination | |
| def create_html_table(table_data: List[List[str]]) -> str: | |
| """Create HTML table from table data""" | |
| if not table_data: | |
| return "<p>No results to display</p>" | |
| headers = ["Description", "First Name", "Middle Name", "Last Name", "Status", "Result"] | |
| html = """ | |
| <div style="overflow-x: auto; max-height: 600px; border: 1px solid #ddd;"> | |
| <table style="width: 100%; border-collapse: collapse; margin: 0;"> | |
| <thead style="position: sticky; top: 0; background: #f8f9fa;"> | |
| <tr> | |
| """ | |
| # Add headers | |
| for header in headers: | |
| html += f'<th style="border: 1px solid #ddd; padding: 12px; text-align: left; font-weight: bold; background: #f8f9fa;">{header}</th>' | |
| html += """ | |
| </tr> | |
| </thead> | |
| <tbody> | |
| """ | |
| # Add data rows | |
| for i, row in enumerate(table_data): | |
| # Determine row color based on status | |
| bg_color = "#ffffff" | |
| if len(row) >= 5: # Check if status exists | |
| if row[4] == "Success": | |
| bg_color = "#d4edda" # Light green for success | |
| elif row[4] == "Failed": | |
| bg_color = "#f8d7da" # Light red for failed | |
| elif row[4] == "Error": | |
| bg_color = "#fff3cd" # Light yellow for error | |
| html += f'<tr style="background-color: {bg_color};">' | |
| # Only show first 6 columns (exclude error column for cleaner display) | |
| for j, cell in enumerate(row[:6]): | |
| display_text = str(cell) if cell else "" | |
| if len(display_text) > 80: | |
| display_text = display_text[:80] + "..." | |
| html += f'<td style="border: 1px solid #ddd; padding: 8px; word-wrap: break-word; max-width: 200px;" title="{str(cell)}">{display_text}</td>' | |
| html += '</tr>' | |
| html += """ | |
| </tbody> | |
| </table> | |
| </div> | |
| """ | |
| return html | |
| def validate_tin_sequential(first_name: str, middle_name: str, last_name: str, | |
| tin_id: str, birthdate: str, gender: str) -> Tuple[List[List[str]], str, bool, str]: | |
| """ | |
| Sequential validation with updated BIR system | |
| """ | |
| # Input validation | |
| if not all([first_name.strip(), last_name.strip(), tin_id.strip(), birthdate.strip(), gender.strip()]): | |
| return [], "β Please fill in all required fields", False, "" | |
| # Validate TIN format | |
| clean_tin = tin_id.replace("-", "").replace(" ", "").strip() | |
| if not clean_tin.isdigit() or len(clean_tin) != 9: | |
| return [], f"β TIN must be exactly 9 digits (current: {len(clean_tin)} digits)", False, "" | |
| # Generate combinations | |
| combinations = generate_name_combinations(first_name, middle_name, last_name) | |
| total_combinations = len(combinations) | |
| if total_combinations == 0: | |
| return [], "β Could not generate valid name combinations", False, "" | |
| logger.info(f"Generated {total_combinations} name combinations") | |
| # Sequential validation | |
| completed = 0 | |
| successful_combination = None | |
| for i, combo in enumerate(combinations): | |
| logger.info(f"Testing combination {i+1}/{total_combinations}: {combo.description}") | |
| result_combo = validate_single_combination(tin_id, birthdate, gender, combo) | |
| # Update original combination | |
| combo.status = result_combo.status | |
| combo.result = result_combo.result | |
| combo.error_message = result_combo.error_message | |
| completed += 1 | |
| # Check for success | |
| if combo.status == "Success" and not successful_combination: | |
| successful_combination = combo | |
| logger.info(f"β Found successful combination: {combo.description}") | |
| break # Stop on first success | |
| # Create results table data | |
| table_data = [] | |
| for combo in combinations: | |
| if combo.status != "Pending": # Only show tested combinations | |
| table_data.append([ | |
| combo.description, | |
| combo.first_name, | |
| combo.middle_name, | |
| combo.last_name, | |
| combo.status, | |
| combo.result, | |
| combo.error_message | |
| ]) | |
| # Generate HTML table | |
| html_table = create_html_table(table_data) | |
| # Generate summary | |
| successful_count = sum(1 for combo in combinations if combo.status == "Success") | |
| failed_count = sum(1 for combo in combinations if combo.status == "Failed") | |
| error_count = sum(1 for combo in combinations if combo.status == "Error") | |
| if successful_combination: | |
| summary = f""" | |
| π **TIN VALIDATION SUCCESSFUL!** | |
| β **Matching Combination:** {successful_combination.description} | |
| π **Details:** {successful_combination.result} | |
| π **Summary:** | |
| - β Successful: {successful_count} | |
| - β Failed: {failed_count} | |
| - β οΈ Errors: {error_count} | |
| - π Total Tested: {completed}/{total_combinations} | |
| π **Validated Information:** | |
| - **TIN:** {tin_id} | |
| - **Name:** {successful_combination.first_name} {successful_combination.middle_name} {successful_combination.last_name} | |
| - **Gender:** {gender} | |
| - **Birthdate:** {birthdate} | |
| """ | |
| success = True | |
| else: | |
| summary = f""" | |
| β **No successful TIN validation found** | |
| π **Summary:** | |
| - β Successful: {successful_count} | |
| - β Failed: {failed_count} | |
| - β οΈ Errors: {error_count} | |
| - π Total Tested: {completed}/{total_combinations} | |
| π‘ **Possible reasons:** | |
| - Personal information doesn't match BIR database exactly | |
| - TIN may not be registered or active | |
| - Name formatting differences | |
| - Website temporarily unavailable | |
| - Data entry errors | |
| π§ **Suggestions:** | |
| - Double-check TIN number (must be 9 digits) | |
| - Verify name spelling and format | |
| - Check birthdate format (MM-DD-YYYY) | |
| - Try again later if server issues | |
| """ | |
| success = False | |
| return table_data, summary, success, html_table | |
| # Create Gradio interface | |
| def create_interface(): | |
| with gr.Blocks(title="TIN Validator - Updated", theme=gr.themes.Soft()) as demo: | |
| gr.Markdown(""" | |
| # π Philippine TIN Validator (AAI Data Operation Team) | |
| ### Validate Tax Identification Numbers using the official BIR system | |
| This tool has been updated to work with the current BIR validation system. | |
| It tests multiple name formatting combinations to find a match. | |
| β οΈ **Important**: This tool respects the BIR server with appropriate delays between requests. | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| gr.Markdown("### π Required Information") | |
| first_name = gr.Textbox( | |
| label="First Name *", | |
| placeholder="Juan", | |
| info="Enter your first name exactly as registered" | |
| ) | |
| middle_name = gr.Textbox( | |
| label="Middle Name *", | |
| placeholder="Santos", | |
| info="Leave blank if you don't have a middle name" | |
| ) | |
| last_name = gr.Textbox( | |
| label="Last Name *", | |
| placeholder="Dela Cruz", | |
| info="Enter your last name exactly as registered" | |
| ) | |
| tin_id = gr.Textbox( | |
| label="TIN Number *", | |
| placeholder="123456789", | |
| info="9-digit number only (no hyphens)" | |
| ) | |
| birthdate = gr.Textbox( | |
| label="Birth Date *", | |
| placeholder="01-15-1990 or 15-01-1990", | |
| info="MM-DD-YYYY or DD-MM-YYYY format" | |
| ) | |
| gender = gr.Radio( | |
| choices=["Male", "Female"], | |
| label="Gender *", | |
| value="Male", | |
| info="Select your gender as registered" | |
| ) | |
| validate_btn = gr.Button( | |
| "π Validate TIN", | |
| variant="primary", | |
| size="lg" | |
| ) | |
| gr.Markdown(""" | |
| ### β±οΈ Process Info: | |
| - **Processing time**: 2-5 minutes | |
| - **Rate limiting**: 3 seconds between attempts | |
| - **Smart stopping**: Stops when match is found | |
| - **Multiple formats**: Tests various name combinations | |
| """) | |
| with gr.Column(scale=2): | |
| gr.Markdown("### π Validation Results") | |
| progress_output = gr.Textbox( | |
| label="Status", | |
| value="Ready to validate - click the button to start", | |
| interactive=False, | |
| lines=2 | |
| ) | |
| summary_output = gr.Markdown(value="Results will appear here after validation...") | |
| results_table = gr.HTML( | |
| value="<p>Detailed results table will appear here...</p>", | |
| label="Detailed Results" | |
| ) | |
| # Event handlers | |
| def validate_and_update(first, middle, last, tin, birth, gender_val): | |
| try: | |
| # Initial progress update | |
| yield ( | |
| "π Starting TIN validation...\nConnecting to BIR system...", | |
| "### π Processing...\nPlease wait while we validate your information.", | |
| "<p style='text-align: center; color: #666;'>Validation in progress...</p>" | |
| ) | |
| # Perform validation | |
| table_data, summary, success, html_table = validate_tin_sequential( | |
| first, middle, last, tin, birth, gender_val | |
| ) | |
| if success: | |
| yield ( | |
| "β Validation completed successfully!", | |
| summary, | |
| html_table | |
| ) | |
| else: | |
| yield ( | |
| "β Validation completed - no matches found", | |
| summary, | |
| html_table if html_table else "<p>No detailed results available</p>" | |
| ) | |
| except Exception as e: | |
| error_msg = f"β An error occurred during validation:\n{str(e)}" | |
| yield ( | |
| error_msg, | |
| f"### β Error\n```\n{str(e)}\n```\n\nPlease check your input and try again.", | |
| "<p style='color: red;'>An error occurred during validation. Please try again.</p>" | |
| ) | |
| validate_btn.click( | |
| fn=validate_and_update, | |
| inputs=[first_name, middle_name, last_name, tin_id, birthdate, gender], | |
| outputs=[progress_output, summary_output, results_table] | |
| ) | |
| # # Example usage | |
| # gr.Markdown("### π Example Input") | |
| # gr.Examples( | |
| # examples=[ | |
| # ["Juan", "Santos", "Dela Cruz", "123456789", "01-15-1990", "Male"], | |
| # ["Maria", "Carmen", "Rodriguez", "987654321", "12-25-1985", "Female"], | |
| # ["Jose", "", "Rizal", "111222333", "06-19-1861", "Male"], | |
| # ], | |
| # inputs=[first_name, middle_name, last_name, tin_id, birthdate, gender], | |
| # ) | |
| # gr.Markdown(""" | |
| # ### π§ How it Works: | |
| # 1. **Connects to BIR System** - Uses the official validation endpoint | |
| # 2. **Tests Name Variations** - Tries different formatting combinations | |
| # 3. **JSON Response Parsing** - Analyzes the server's response properly | |
| # 4. **Sequential Processing** - One request at a time with delays | |
| # ### π Input Tips: | |
| # - **TIN**: Enter exactly 9 digits (remove hyphens) | |
| # - **Names**: Use the exact spelling from your registration | |
| # - **Date**: MM-DD-YYYY format works best | |
| # - **Patience**: Process takes 2-5 minutes due to rate limiting | |
| # ### β οΈ Troubleshooting: | |
| # - If validation fails, check your TIN and personal information | |
| # - Ensure birthdate is in correct format | |
| # - Verify name spelling matches your BIR registration | |
| # - Try again if server is temporarily unavailable | |
| # """) | |
| return demo | |
| # Launch the app | |
| if __name__ == "__main__": | |
| demo = create_interface() | |
| demo.launch(server_name="0.0.0.0", server_port=7860, share=True) |