import gradio as gr import requests from bs4 import BeautifulSoup import time import re import json from datetime import datetime from typing import List, Dict, Tuple, Optional import concurrent.futures from dataclasses import dataclass import logging # Setup logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) @dataclass class NameCombination: description: str first_name: str middle_name: str last_name: str status: str = "Pending" result: str = "" error_message: str = "" class TINValidatorWebScraper: def __init__(self): self.session = requests.Session() self.session.headers.update({ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Language': 'en-US,en;q=0.9', 'Accept-Encoding': 'gzip, deflate, br', 'Connection': 'keep-alive', 'Upgrade-Insecure-Requests': '1', }) # Updated URLs based on the HTML you provided self.base_url = "https://revie.bir.gov.ph/tin-validation" self.submit_url = "https://revie.bir.gov.ph/submitTinValidation" def get_initial_page(self) -> bool: """Load the initial TIN validation page to establish session""" try: response = self.session.get(self.base_url, timeout=30) response.raise_for_status() logger.info("Successfully loaded initial page") return True except Exception as e: logger.error(f"Error loading initial page: {e}") return False def validate_tin(self, tin_id: str, first_name: str, middle_name: str, last_name: str, birthdate: str, gender: str) -> Tuple[bool, str]: """ Validate TIN using the updated BIR system """ try: # Load initial page first to establish session if not self.get_initial_page(): return False, "Failed to establish session with BIR website" # Clean TIN - remove any formatting clean_tin = tin_id.replace("-", "").replace(" ", "").strip() # Ensure TIN is 9 digits if not clean_tin.isdigit() or len(clean_tin) != 9: return False, f"TIN must be exactly 9 digits (got: {len(clean_tin)} digits)" # Format birthdate to YYYY-MM-DD (HTML date format) formatted_birthdate = self.format_birthdate_html(birthdate) if not formatted_birthdate: return False, "Invalid birthdate format. Please use MM-DD-YYYY or DD-MM-YYYY" # Prepare form data exactly as shown in the HTML form_data = { 'tin': clean_tin, 'firstName': first_name.strip().upper(), 'middleName': middle_name.strip().upper() if middle_name else '', 'lastName': last_name.strip().upper(), 'gender': gender.upper(), # 'MALE' or 'FEMALE' 'birthdate': formatted_birthdate } logger.info(f"Submitting validation with data: {form_data}") # Set headers for form submission headers = { 'Content-Type': 'application/x-www-form-urlencoded', 'X-Requested-With': 'XMLHttpRequest', # Important for AJAX requests 'Referer': self.base_url, 'Origin': 'https://revie.bir.gov.ph' } # Submit form to the correct endpoint response = self.session.post( self.submit_url, data=form_data, headers=headers, timeout=60 ) logger.info(f"Response status: {response.status_code}") logger.info(f"Response content: {response.text[:500]}...") if response.status_code == 200: return self.parse_json_response(response.text) else: return False, f"HTTP Error: {response.status_code} - {response.text[:200]}" except requests.RequestException as e: return False, f"Network error: {str(e)}" except Exception as e: return False, f"Validation error: {str(e)}" def parse_json_response(self, response_text: str) -> Tuple[bool, str]: """Parse the JSON response from BIR system""" try: # The response should be JSON according to the HTML code data = json.loads(response_text) status = data.get('status', '').upper() if status == 'RECORD_MATCHED': # Success case rdo = data.get('RDO', 'Unknown RDO') return True, f"✅ TIN is VALID and registered under {rdo}" elif status == 'RECORD_NOT_MATCHED': # Failed case return False, "❌ The information provided does not match the records in our database" else: # Other cases return False, f"❌ Unable to process request. Status: {status}" except json.JSONDecodeError: # If not JSON, treat as HTML response return self.parse_html_response(response_text) except Exception as e: return False, f"Error parsing response: {str(e)}" def parse_html_response(self, html_content: str) -> Tuple[bool, str]: """Fallback HTML parsing if JSON parsing fails""" try: soup = BeautifulSoup(html_content, 'html.parser') text_content = soup.get_text().lower() # Look for success indicators if any(phrase in text_content for phrase in [ 'valid and registered', 'record matched', 'successfully verified', 'tin provided is valid', 'validation successful' ]): # Try to extract RDO information rdo_match = re.search(r'(rdo\d+[^.]*)', text_content, re.IGNORECASE) rdo_info = rdo_match.group(1) if rdo_match else "Unknown RDO" return True, f"✅ TIN validation successful - {rdo_info}" # Look for failure indicators elif any(phrase in text_content for phrase in [ 'does not match', 'record not matched', 'cannot process', 'not found', 'invalid information', 'regret to inform' ]): return False, "❌ Information does not match BIR database records" # If no clear indicators, return the response return False, f"Unclear response: {text_content[:200]}..." except Exception as e: return False, f"Error parsing HTML response: {str(e)}" def format_birthdate_html(self, birthdate: str) -> str: """Format birthdate to HTML date format (YYYY-MM-DD)""" try: # Clean the input clean_date = birthdate.replace("-", "/").replace(" ", "").strip() # Try different input formats and convert to YYYY-MM-DD formats_to_try = [ "%m/%d/%Y", # MM/DD/YYYY "%d/%m/%Y", # DD/MM/YYYY "%Y/%m/%d", # YYYY/MM/DD "%m-%d-%Y", # MM-DD-YYYY "%d-%m-%Y", # DD-MM-YYYY "%Y-%m-%d", # YYYY-MM-DD ] for fmt in formats_to_try: try: date_obj = datetime.strptime(clean_date, fmt) return date_obj.strftime("%Y-%m-%d") # HTML date format except ValueError: continue # If all formats fail logger.warning(f"Could not parse birthdate: {birthdate}") return None except Exception as e: logger.error(f"Error formatting birthdate: {e}") return None def generate_name_combinations(first_name: str, middle_name: str, last_name: str) -> List[NameCombination]: """Generate prioritized name combinations with and without middle name""" combinations = [] # Clean inputs first_name = first_name.strip() middle_name = middle_name.strip() last_name = last_name.strip() seen = set() # Split middle name if contains multiple words middle_parts = middle_name.split() if len(middle_parts) >= 2: # Extract first part from middle name moved_part = middle_parts[0] remaining_middle = " ".join(middle_parts[1:]) combined_first = f"{first_name} {moved_part}" combinations.append(NameCombination( description="Split middle: First + first middle word | Remaining middle | Last", first_name=combined_first, middle_name=remaining_middle, last_name=last_name )) # 1. Try no-middle combinations first combo_no_middle = [ { "desc": "No middle: First | '' | Last", "first": first_name, "middle": "", "last": last_name }, { "desc": "Combined First + Middle (no middle field): 'First Middle' | '' | Last", "first": f"{first_name} {middle_name}", "middle": "", "last": last_name } ] # 2. Try middle name variations combo_with_middle = [ { "desc": "Standard: First | Middle | Last", "first": first_name, "middle": middle_name, "last": last_name }, { "desc": "Middle Initial: First | M | Last", "first": first_name, "middle": middle_name[0] if middle_name else "", "last": last_name }, { "desc": "Middle Initial with dot: First | M. | Last", "first": first_name, "middle": f"{middle_name[0]}." if middle_name else "", "last": last_name }, { "desc": "Middle with dot: First | Middle. | Last", "first": first_name, "middle": f"{middle_name}." if not middle_name.endswith(".") else middle_name, "last": last_name }, { "desc": "Middle without dot: First | Middle(no dot) | Last", "first": first_name, "middle": middle_name.replace(".", ""), "last": last_name } ] # Merge all combinations all_combos = combo_no_middle + combo_with_middle for combo in all_combos: key = f"{combo['first']}|{combo['middle']}|{combo['last']}" if key not in seen and combo['first'] and combo['last']: seen.add(key) combinations.append(NameCombination( description=combo["desc"], first_name=combo["first"], middle_name=combo["middle"], last_name=combo["last"] )) return combinations def validate_single_combination(tin_id: str, birthdate: str, gender: str, combination: NameCombination) -> NameCombination: """Validate a single name combination""" validator = TINValidatorWebScraper() try: # Add delay to be respectful to the server time.sleep(3) # Increased delay logger.info(f"Testing combination: {combination.description}") is_valid, message = validator.validate_tin( tin_id, combination.first_name, combination.middle_name, combination.last_name, birthdate, gender ) combination.status = "Success" if is_valid else "Failed" combination.result = message combination.error_message = "" if is_valid else message logger.info(f"Result for {combination.description}: {'SUCCESS' if is_valid else 'FAILED'}") except Exception as e: combination.status = "Error" combination.result = "" combination.error_message = str(e) logger.error(f"Error testing {combination.description}: {e}") return combination def create_html_table(table_data: List[List[str]]) -> str: """Create HTML table from table data""" if not table_data: return "
No results to display
" headers = ["Description", "First Name", "Middle Name", "Last Name", "Status", "Result"] html = """| {header} | ' html += """
|---|
| {display_text} | ' html += '
Detailed results table will appear here...
", label="Detailed Results" ) # Event handlers def validate_and_update(first, middle, last, tin, birth, gender_val): try: # Initial progress update yield ( "🔄 Starting TIN validation...\nConnecting to BIR system...", "### 🔄 Processing...\nPlease wait while we validate your information.", "Validation in progress...
" ) # Perform validation table_data, summary, success, html_table = validate_tin_sequential( first, middle, last, tin, birth, gender_val ) if success: yield ( "✅ Validation completed successfully!", summary, html_table ) else: yield ( "❌ Validation completed - no matches found", summary, html_table if html_table else "No detailed results available
" ) except Exception as e: error_msg = f"❌ An error occurred during validation:\n{str(e)}" yield ( error_msg, f"### ❌ Error\n```\n{str(e)}\n```\n\nPlease check your input and try again.", "An error occurred during validation. Please try again.
" ) validate_btn.click( fn=validate_and_update, inputs=[first_name, middle_name, last_name, tin_id, birthdate, gender], outputs=[progress_output, summary_output, results_table] ) # # Example usage # gr.Markdown("### 📋 Example Input") # gr.Examples( # examples=[ # ["Juan", "Santos", "Dela Cruz", "123456789", "01-15-1990", "Male"], # ["Maria", "Carmen", "Rodriguez", "987654321", "12-25-1985", "Female"], # ["Jose", "", "Rizal", "111222333", "06-19-1861", "Male"], # ], # inputs=[first_name, middle_name, last_name, tin_id, birthdate, gender], # ) # gr.Markdown(""" # ### 🔧 How it Works: # 1. **Connects to BIR System** - Uses the official validation endpoint # 2. **Tests Name Variations** - Tries different formatting combinations # 3. **JSON Response Parsing** - Analyzes the server's response properly # 4. **Sequential Processing** - One request at a time with delays # ### 📋 Input Tips: # - **TIN**: Enter exactly 9 digits (remove hyphens) # - **Names**: Use the exact spelling from your registration # - **Date**: MM-DD-YYYY format works best # - **Patience**: Process takes 2-5 minutes due to rate limiting # ### ⚠️ Troubleshooting: # - If validation fails, check your TIN and personal information # - Ensure birthdate is in correct format # - Verify name spelling matches your BIR registration # - Try again if server is temporarily unavailable # """) return demo # Launch the app if __name__ == "__main__": demo = create_interface() demo.launch(server_name="0.0.0.0", server_port=7860, share=True)