import gradio as gr import requests from bs4 import BeautifulSoup import time import re import json from datetime import datetime from typing import List, Dict, Tuple, Optional import concurrent.futures from dataclasses import dataclass import logging # Setup logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) @dataclass class NameCombination: description: str first_name: str middle_name: str last_name: str status: str = "Pending" result: str = "" error_message: str = "" class TINValidatorWebScraper: def __init__(self): self.session = requests.Session() self.session.headers.update({ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Language': 'en-US,en;q=0.9', 'Accept-Encoding': 'gzip, deflate, br', 'Connection': 'keep-alive', 'Upgrade-Insecure-Requests': '1', }) # Updated URLs based on the HTML you provided self.base_url = "https://revie.bir.gov.ph/tin-validation" self.submit_url = "https://revie.bir.gov.ph/submitTinValidation" def get_initial_page(self) -> bool: """Load the initial TIN validation page to establish session""" try: response = self.session.get(self.base_url, timeout=30) response.raise_for_status() logger.info("Successfully loaded initial page") return True except Exception as e: logger.error(f"Error loading initial page: {e}") return False def validate_tin(self, tin_id: str, first_name: str, middle_name: str, last_name: str, birthdate: str, gender: str) -> Tuple[bool, str]: """ Validate TIN using the updated BIR system """ try: # Load initial page first to establish session if not self.get_initial_page(): return False, "Failed to establish session with BIR website" # Clean TIN - remove any formatting clean_tin = tin_id.replace("-", "").replace(" ", "").strip() # Ensure TIN is 9 digits if not clean_tin.isdigit() or len(clean_tin) != 9: return False, f"TIN must be exactly 9 digits (got: {len(clean_tin)} digits)" # Format birthdate to YYYY-MM-DD (HTML date format) formatted_birthdate = self.format_birthdate_html(birthdate) if not formatted_birthdate: return False, "Invalid birthdate format. Please use MM-DD-YYYY or DD-MM-YYYY" # Prepare form data exactly as shown in the HTML form_data = { 'tin': clean_tin, 'firstName': first_name.strip().upper(), 'middleName': middle_name.strip().upper() if middle_name else '', 'lastName': last_name.strip().upper(), 'gender': gender.upper(), # 'MALE' or 'FEMALE' 'birthdate': formatted_birthdate } logger.info(f"Submitting validation with data: {form_data}") # Set headers for form submission headers = { 'Content-Type': 'application/x-www-form-urlencoded', 'X-Requested-With': 'XMLHttpRequest', # Important for AJAX requests 'Referer': self.base_url, 'Origin': 'https://revie.bir.gov.ph' } # Submit form to the correct endpoint response = self.session.post( self.submit_url, data=form_data, headers=headers, timeout=60 ) logger.info(f"Response status: {response.status_code}") logger.info(f"Response content: {response.text[:500]}...") if response.status_code == 200: return self.parse_json_response(response.text) else: return False, f"HTTP Error: {response.status_code} - {response.text[:200]}" except requests.RequestException as e: return False, f"Network error: {str(e)}" except Exception as e: return False, f"Validation error: {str(e)}" def parse_json_response(self, response_text: str) -> Tuple[bool, str]: """Parse the JSON response from BIR system""" try: # The response should be JSON according to the HTML code data = json.loads(response_text) status = data.get('status', '').upper() if status == 'RECORD_MATCHED': # Success case rdo = data.get('RDO', 'Unknown RDO') return True, f"✅ TIN is VALID and registered under {rdo}" elif status == 'RECORD_NOT_MATCHED': # Failed case return False, "❌ The information provided does not match the records in our database" else: # Other cases return False, f"❌ Unable to process request. Status: {status}" except json.JSONDecodeError: # If not JSON, treat as HTML response return self.parse_html_response(response_text) except Exception as e: return False, f"Error parsing response: {str(e)}" def parse_html_response(self, html_content: str) -> Tuple[bool, str]: """Fallback HTML parsing if JSON parsing fails""" try: soup = BeautifulSoup(html_content, 'html.parser') text_content = soup.get_text().lower() # Look for success indicators if any(phrase in text_content for phrase in [ 'valid and registered', 'record matched', 'successfully verified', 'tin provided is valid', 'validation successful' ]): # Try to extract RDO information rdo_match = re.search(r'(rdo\d+[^.]*)', text_content, re.IGNORECASE) rdo_info = rdo_match.group(1) if rdo_match else "Unknown RDO" return True, f"✅ TIN validation successful - {rdo_info}" # Look for failure indicators elif any(phrase in text_content for phrase in [ 'does not match', 'record not matched', 'cannot process', 'not found', 'invalid information', 'regret to inform' ]): return False, "❌ Information does not match BIR database records" # If no clear indicators, return the response return False, f"Unclear response: {text_content[:200]}..." except Exception as e: return False, f"Error parsing HTML response: {str(e)}" def format_birthdate_html(self, birthdate: str) -> str: """Format birthdate to HTML date format (YYYY-MM-DD)""" try: # Clean the input clean_date = birthdate.replace("-", "/").replace(" ", "").strip() # Try different input formats and convert to YYYY-MM-DD formats_to_try = [ "%m/%d/%Y", # MM/DD/YYYY "%d/%m/%Y", # DD/MM/YYYY "%Y/%m/%d", # YYYY/MM/DD "%m-%d-%Y", # MM-DD-YYYY "%d-%m-%Y", # DD-MM-YYYY "%Y-%m-%d", # YYYY-MM-DD ] for fmt in formats_to_try: try: date_obj = datetime.strptime(clean_date, fmt) return date_obj.strftime("%Y-%m-%d") # HTML date format except ValueError: continue # If all formats fail logger.warning(f"Could not parse birthdate: {birthdate}") return None except Exception as e: logger.error(f"Error formatting birthdate: {e}") return None def generate_name_combinations(first_name: str, middle_name: str, last_name: str) -> List[NameCombination]: """Generate prioritized name combinations with and without middle name""" combinations = [] # Clean inputs first_name = first_name.strip() middle_name = middle_name.strip() last_name = last_name.strip() seen = set() # Split middle name if contains multiple words middle_parts = middle_name.split() if len(middle_parts) >= 2: # Extract first part from middle name moved_part = middle_parts[0] remaining_middle = " ".join(middle_parts[1:]) combined_first = f"{first_name} {moved_part}" combinations.append(NameCombination( description="Split middle: First + first middle word | Remaining middle | Last", first_name=combined_first, middle_name=remaining_middle, last_name=last_name )) # 1. Try no-middle combinations first combo_no_middle = [ { "desc": "No middle: First | '' | Last", "first": first_name, "middle": "", "last": last_name }, { "desc": "Combined First + Middle (no middle field): 'First Middle' | '' | Last", "first": f"{first_name} {middle_name}", "middle": "", "last": last_name } ] # 2. Try middle name variations combo_with_middle = [ { "desc": "Standard: First | Middle | Last", "first": first_name, "middle": middle_name, "last": last_name }, { "desc": "Middle Initial: First | M | Last", "first": first_name, "middle": middle_name[0] if middle_name else "", "last": last_name }, { "desc": "Middle Initial with dot: First | M. | Last", "first": first_name, "middle": f"{middle_name[0]}." if middle_name else "", "last": last_name }, { "desc": "Middle with dot: First | Middle. | Last", "first": first_name, "middle": f"{middle_name}." if not middle_name.endswith(".") else middle_name, "last": last_name }, { "desc": "Middle without dot: First | Middle(no dot) | Last", "first": first_name, "middle": middle_name.replace(".", ""), "last": last_name } ] # Merge all combinations all_combos = combo_no_middle + combo_with_middle for combo in all_combos: key = f"{combo['first']}|{combo['middle']}|{combo['last']}" if key not in seen and combo['first'] and combo['last']: seen.add(key) combinations.append(NameCombination( description=combo["desc"], first_name=combo["first"], middle_name=combo["middle"], last_name=combo["last"] )) return combinations def validate_single_combination(tin_id: str, birthdate: str, gender: str, combination: NameCombination) -> NameCombination: """Validate a single name combination""" validator = TINValidatorWebScraper() try: # Add delay to be respectful to the server time.sleep(3) # Increased delay logger.info(f"Testing combination: {combination.description}") is_valid, message = validator.validate_tin( tin_id, combination.first_name, combination.middle_name, combination.last_name, birthdate, gender ) combination.status = "Success" if is_valid else "Failed" combination.result = message combination.error_message = "" if is_valid else message logger.info(f"Result for {combination.description}: {'SUCCESS' if is_valid else 'FAILED'}") except Exception as e: combination.status = "Error" combination.result = "" combination.error_message = str(e) logger.error(f"Error testing {combination.description}: {e}") return combination def create_html_table(table_data: List[List[str]]) -> str: """Create HTML table from table data""" if not table_data: return "

No results to display

" headers = ["Description", "First Name", "Middle Name", "Last Name", "Status", "Result"] html = """
""" # Add headers for header in headers: html += f'' html += """ """ # Add data rows for i, row in enumerate(table_data): # Determine row color based on status bg_color = "#ffffff" if len(row) >= 5: # Check if status exists if row[4] == "Success": bg_color = "#d4edda" # Light green for success elif row[4] == "Failed": bg_color = "#f8d7da" # Light red for failed elif row[4] == "Error": bg_color = "#fff3cd" # Light yellow for error html += f'' # Only show first 6 columns (exclude error column for cleaner display) for j, cell in enumerate(row[:6]): display_text = str(cell) if cell else "" if len(display_text) > 80: display_text = display_text[:80] + "..." html += f'' html += '' html += """
{header}
{display_text}
""" return html def validate_tin_sequential(first_name: str, middle_name: str, last_name: str, tin_id: str, birthdate: str, gender: str) -> Tuple[List[List[str]], str, bool, str]: """ Sequential validation with updated BIR system """ # Input validation if not all([first_name.strip(), last_name.strip(), tin_id.strip(), birthdate.strip(), gender.strip()]): return [], "❌ Please fill in all required fields", False, "" # Validate TIN format clean_tin = tin_id.replace("-", "").replace(" ", "").strip() if not clean_tin.isdigit() or len(clean_tin) != 9: return [], f"❌ TIN must be exactly 9 digits (current: {len(clean_tin)} digits)", False, "" # Generate combinations combinations = generate_name_combinations(first_name, middle_name, last_name) total_combinations = len(combinations) if total_combinations == 0: return [], "❌ Could not generate valid name combinations", False, "" logger.info(f"Generated {total_combinations} name combinations") # Sequential validation completed = 0 successful_combination = None for i, combo in enumerate(combinations): logger.info(f"Testing combination {i+1}/{total_combinations}: {combo.description}") result_combo = validate_single_combination(tin_id, birthdate, gender, combo) # Update original combination combo.status = result_combo.status combo.result = result_combo.result combo.error_message = result_combo.error_message completed += 1 # Check for success if combo.status == "Success" and not successful_combination: successful_combination = combo logger.info(f"✅ Found successful combination: {combo.description}") break # Stop on first success # Create results table data table_data = [] for combo in combinations: if combo.status != "Pending": # Only show tested combinations table_data.append([ combo.description, combo.first_name, combo.middle_name, combo.last_name, combo.status, combo.result, combo.error_message ]) # Generate HTML table html_table = create_html_table(table_data) # Generate summary successful_count = sum(1 for combo in combinations if combo.status == "Success") failed_count = sum(1 for combo in combinations if combo.status == "Failed") error_count = sum(1 for combo in combinations if combo.status == "Error") if successful_combination: summary = f""" 🎉 **TIN VALIDATION SUCCESSFUL!** ✅ **Matching Combination:** {successful_combination.description} 📝 **Details:** {successful_combination.result} 📊 **Summary:** - ✅ Successful: {successful_count} - ❌ Failed: {failed_count} - ⚠️ Errors: {error_count} - 📋 Total Tested: {completed}/{total_combinations} 🆔 **Validated Information:** - **TIN:** {tin_id} - **Name:** {successful_combination.first_name} {successful_combination.middle_name} {successful_combination.last_name} - **Gender:** {gender} - **Birthdate:** {birthdate} """ success = True else: summary = f""" ❌ **No successful TIN validation found** 📊 **Summary:** - ✅ Successful: {successful_count} - ❌ Failed: {failed_count} - ⚠️ Errors: {error_count} - 📋 Total Tested: {completed}/{total_combinations} 💡 **Possible reasons:** - Personal information doesn't match BIR database exactly - TIN may not be registered or active - Name formatting differences - Website temporarily unavailable - Data entry errors 🔧 **Suggestions:** - Double-check TIN number (must be 9 digits) - Verify name spelling and format - Check birthdate format (MM-DD-YYYY) - Try again later if server issues """ success = False return table_data, summary, success, html_table # Create Gradio interface def create_interface(): with gr.Blocks(title="TIN Validator - Updated", theme=gr.themes.Soft()) as demo: gr.Markdown(""" # 🆔 Philippine TIN Validator (AAI Data Operation Team) ### Validate Tax Identification Numbers using the official BIR system This tool has been updated to work with the current BIR validation system. It tests multiple name formatting combinations to find a match. ⚠️ **Important**: This tool respects the BIR server with appropriate delays between requests. """) with gr.Row(): with gr.Column(scale=1): gr.Markdown("### 📝 Required Information") first_name = gr.Textbox( label="First Name *", placeholder="Juan", info="Enter your first name exactly as registered" ) middle_name = gr.Textbox( label="Middle Name *", placeholder="Santos", info="Leave blank if you don't have a middle name" ) last_name = gr.Textbox( label="Last Name *", placeholder="Dela Cruz", info="Enter your last name exactly as registered" ) tin_id = gr.Textbox( label="TIN Number *", placeholder="123456789", info="9-digit number only (no hyphens)" ) birthdate = gr.Textbox( label="Birth Date *", placeholder="01-15-1990 or 15-01-1990", info="MM-DD-YYYY or DD-MM-YYYY format" ) gender = gr.Radio( choices=["Male", "Female"], label="Gender *", value="Male", info="Select your gender as registered" ) validate_btn = gr.Button( "🔍 Validate TIN", variant="primary", size="lg" ) gr.Markdown(""" ### ⏱️ Process Info: - **Processing time**: 2-5 minutes - **Rate limiting**: 3 seconds between attempts - **Smart stopping**: Stops when match is found - **Multiple formats**: Tests various name combinations """) with gr.Column(scale=2): gr.Markdown("### 📊 Validation Results") progress_output = gr.Textbox( label="Status", value="Ready to validate - click the button to start", interactive=False, lines=2 ) summary_output = gr.Markdown(value="Results will appear here after validation...") results_table = gr.HTML( value="

Detailed results table will appear here...

", label="Detailed Results" ) # Event handlers def validate_and_update(first, middle, last, tin, birth, gender_val): try: # Initial progress update yield ( "🔄 Starting TIN validation...\nConnecting to BIR system...", "### 🔄 Processing...\nPlease wait while we validate your information.", "

Validation in progress...

" ) # Perform validation table_data, summary, success, html_table = validate_tin_sequential( first, middle, last, tin, birth, gender_val ) if success: yield ( "✅ Validation completed successfully!", summary, html_table ) else: yield ( "❌ Validation completed - no matches found", summary, html_table if html_table else "

No detailed results available

" ) except Exception as e: error_msg = f"❌ An error occurred during validation:\n{str(e)}" yield ( error_msg, f"### ❌ Error\n```\n{str(e)}\n```\n\nPlease check your input and try again.", "

An error occurred during validation. Please try again.

" ) validate_btn.click( fn=validate_and_update, inputs=[first_name, middle_name, last_name, tin_id, birthdate, gender], outputs=[progress_output, summary_output, results_table] ) # # Example usage # gr.Markdown("### 📋 Example Input") # gr.Examples( # examples=[ # ["Juan", "Santos", "Dela Cruz", "123456789", "01-15-1990", "Male"], # ["Maria", "Carmen", "Rodriguez", "987654321", "12-25-1985", "Female"], # ["Jose", "", "Rizal", "111222333", "06-19-1861", "Male"], # ], # inputs=[first_name, middle_name, last_name, tin_id, birthdate, gender], # ) # gr.Markdown(""" # ### 🔧 How it Works: # 1. **Connects to BIR System** - Uses the official validation endpoint # 2. **Tests Name Variations** - Tries different formatting combinations # 3. **JSON Response Parsing** - Analyzes the server's response properly # 4. **Sequential Processing** - One request at a time with delays # ### 📋 Input Tips: # - **TIN**: Enter exactly 9 digits (remove hyphens) # - **Names**: Use the exact spelling from your registration # - **Date**: MM-DD-YYYY format works best # - **Patience**: Process takes 2-5 minutes due to rate limiting # ### ⚠️ Troubleshooting: # - If validation fails, check your TIN and personal information # - Ensure birthdate is in correct format # - Verify name spelling matches your BIR registration # - Try again if server is temporarily unavailable # """) return demo # Launch the app if __name__ == "__main__": demo = create_interface() demo.launch(server_name="0.0.0.0", server_port=7860, share=True)