tin-checker / app.py
heoliday's picture
Upload app.py
74fbd58 verified
import gradio as gr
import requests
from bs4 import BeautifulSoup
import time
import re
import json
from datetime import datetime
from typing import List, Dict, Tuple, Optional
import concurrent.futures
from dataclasses import dataclass
import logging
# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
@dataclass
class NameCombination:
description: str
first_name: str
middle_name: str
last_name: str
status: str = "Pending"
result: str = ""
error_message: str = ""
class TINValidatorWebScraper:
def __init__(self):
self.session = requests.Session()
self.session.headers.update({
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Language': 'en-US,en;q=0.9',
'Accept-Encoding': 'gzip, deflate, br',
'Connection': 'keep-alive',
'Upgrade-Insecure-Requests': '1',
})
# Updated URLs based on the HTML you provided
self.base_url = "https://revie.bir.gov.ph/tin-validation"
self.submit_url = "https://revie.bir.gov.ph/submitTinValidation"
def get_initial_page(self) -> bool:
"""Load the initial TIN validation page to establish session"""
try:
response = self.session.get(self.base_url, timeout=30)
response.raise_for_status()
logger.info("Successfully loaded initial page")
return True
except Exception as e:
logger.error(f"Error loading initial page: {e}")
return False
def validate_tin(self, tin_id: str, first_name: str, middle_name: str,
last_name: str, birthdate: str, gender: str) -> Tuple[bool, str]:
"""
Validate TIN using the updated BIR system
"""
try:
# Load initial page first to establish session
if not self.get_initial_page():
return False, "Failed to establish session with BIR website"
# Clean TIN - remove any formatting
clean_tin = tin_id.replace("-", "").replace(" ", "").strip()
# Ensure TIN is 9 digits
if not clean_tin.isdigit() or len(clean_tin) != 9:
return False, f"TIN must be exactly 9 digits (got: {len(clean_tin)} digits)"
# Format birthdate to YYYY-MM-DD (HTML date format)
formatted_birthdate = self.format_birthdate_html(birthdate)
if not formatted_birthdate:
return False, "Invalid birthdate format. Please use MM-DD-YYYY or DD-MM-YYYY"
# Prepare form data exactly as shown in the HTML
form_data = {
'tin': clean_tin,
'firstName': first_name.strip().upper(),
'middleName': middle_name.strip().upper() if middle_name else '',
'lastName': last_name.strip().upper(),
'gender': gender.upper(), # 'MALE' or 'FEMALE'
'birthdate': formatted_birthdate
}
logger.info(f"Submitting validation with data: {form_data}")
# Set headers for form submission
headers = {
'Content-Type': 'application/x-www-form-urlencoded',
'X-Requested-With': 'XMLHttpRequest', # Important for AJAX requests
'Referer': self.base_url,
'Origin': 'https://revie.bir.gov.ph'
}
# Submit form to the correct endpoint
response = self.session.post(
self.submit_url,
data=form_data,
headers=headers,
timeout=60
)
logger.info(f"Response status: {response.status_code}")
logger.info(f"Response content: {response.text[:500]}...")
if response.status_code == 200:
return self.parse_json_response(response.text)
else:
return False, f"HTTP Error: {response.status_code} - {response.text[:200]}"
except requests.RequestException as e:
return False, f"Network error: {str(e)}"
except Exception as e:
return False, f"Validation error: {str(e)}"
def parse_json_response(self, response_text: str) -> Tuple[bool, str]:
"""Parse the JSON response from BIR system"""
try:
# The response should be JSON according to the HTML code
data = json.loads(response_text)
status = data.get('status', '').upper()
if status == 'RECORD_MATCHED':
# Success case
rdo = data.get('RDO', 'Unknown RDO')
return True, f"βœ… TIN is VALID and registered under {rdo}"
elif status == 'RECORD_NOT_MATCHED':
# Failed case
return False, "❌ The information provided does not match the records in our database"
else:
# Other cases
return False, f"❌ Unable to process request. Status: {status}"
except json.JSONDecodeError:
# If not JSON, treat as HTML response
return self.parse_html_response(response_text)
except Exception as e:
return False, f"Error parsing response: {str(e)}"
def parse_html_response(self, html_content: str) -> Tuple[bool, str]:
"""Fallback HTML parsing if JSON parsing fails"""
try:
soup = BeautifulSoup(html_content, 'html.parser')
text_content = soup.get_text().lower()
# Look for success indicators
if any(phrase in text_content for phrase in [
'valid and registered', 'record matched', 'successfully verified',
'tin provided is valid', 'validation successful'
]):
# Try to extract RDO information
rdo_match = re.search(r'(rdo\d+[^.]*)', text_content, re.IGNORECASE)
rdo_info = rdo_match.group(1) if rdo_match else "Unknown RDO"
return True, f"βœ… TIN validation successful - {rdo_info}"
# Look for failure indicators
elif any(phrase in text_content for phrase in [
'does not match', 'record not matched', 'cannot process',
'not found', 'invalid information', 'regret to inform'
]):
return False, "❌ Information does not match BIR database records"
# If no clear indicators, return the response
return False, f"Unclear response: {text_content[:200]}..."
except Exception as e:
return False, f"Error parsing HTML response: {str(e)}"
def format_birthdate_html(self, birthdate: str) -> str:
"""Format birthdate to HTML date format (YYYY-MM-DD)"""
try:
# Clean the input
clean_date = birthdate.replace("-", "/").replace(" ", "").strip()
# Try different input formats and convert to YYYY-MM-DD
formats_to_try = [
"%m/%d/%Y", # MM/DD/YYYY
"%d/%m/%Y", # DD/MM/YYYY
"%Y/%m/%d", # YYYY/MM/DD
"%m-%d-%Y", # MM-DD-YYYY
"%d-%m-%Y", # DD-MM-YYYY
"%Y-%m-%d", # YYYY-MM-DD
]
for fmt in formats_to_try:
try:
date_obj = datetime.strptime(clean_date, fmt)
return date_obj.strftime("%Y-%m-%d") # HTML date format
except ValueError:
continue
# If all formats fail
logger.warning(f"Could not parse birthdate: {birthdate}")
return None
except Exception as e:
logger.error(f"Error formatting birthdate: {e}")
return None
def generate_name_combinations(first_name: str, middle_name: str, last_name: str) -> List[NameCombination]:
"""Generate prioritized name combinations with and without middle name"""
combinations = []
# Clean inputs
first_name = first_name.strip()
middle_name = middle_name.strip()
last_name = last_name.strip()
seen = set()
# Split middle name if contains multiple words
middle_parts = middle_name.split()
if len(middle_parts) >= 2:
# Extract first part from middle name
moved_part = middle_parts[0]
remaining_middle = " ".join(middle_parts[1:])
combined_first = f"{first_name} {moved_part}"
combinations.append(NameCombination(
description="Split middle: First + first middle word | Remaining middle | Last",
first_name=combined_first,
middle_name=remaining_middle,
last_name=last_name
))
# 1. Try no-middle combinations first
combo_no_middle = [
{
"desc": "No middle: First | '' | Last",
"first": first_name,
"middle": "",
"last": last_name
},
{
"desc": "Combined First + Middle (no middle field): 'First Middle' | '' | Last",
"first": f"{first_name} {middle_name}",
"middle": "",
"last": last_name
}
]
# 2. Try middle name variations
combo_with_middle = [
{
"desc": "Standard: First | Middle | Last",
"first": first_name,
"middle": middle_name,
"last": last_name
},
{
"desc": "Middle Initial: First | M | Last",
"first": first_name,
"middle": middle_name[0] if middle_name else "",
"last": last_name
},
{
"desc": "Middle Initial with dot: First | M. | Last",
"first": first_name,
"middle": f"{middle_name[0]}." if middle_name else "",
"last": last_name
},
{
"desc": "Middle with dot: First | Middle. | Last",
"first": first_name,
"middle": f"{middle_name}." if not middle_name.endswith(".") else middle_name,
"last": last_name
},
{
"desc": "Middle without dot: First | Middle(no dot) | Last",
"first": first_name,
"middle": middle_name.replace(".", ""),
"last": last_name
}
]
# Merge all combinations
all_combos = combo_no_middle + combo_with_middle
for combo in all_combos:
key = f"{combo['first']}|{combo['middle']}|{combo['last']}"
if key not in seen and combo['first'] and combo['last']:
seen.add(key)
combinations.append(NameCombination(
description=combo["desc"],
first_name=combo["first"],
middle_name=combo["middle"],
last_name=combo["last"]
))
return combinations
def validate_single_combination(tin_id: str, birthdate: str, gender: str,
combination: NameCombination) -> NameCombination:
"""Validate a single name combination"""
validator = TINValidatorWebScraper()
try:
# Add delay to be respectful to the server
time.sleep(3) # Increased delay
logger.info(f"Testing combination: {combination.description}")
is_valid, message = validator.validate_tin(
tin_id, combination.first_name, combination.middle_name,
combination.last_name, birthdate, gender
)
combination.status = "Success" if is_valid else "Failed"
combination.result = message
combination.error_message = "" if is_valid else message
logger.info(f"Result for {combination.description}: {'SUCCESS' if is_valid else 'FAILED'}")
except Exception as e:
combination.status = "Error"
combination.result = ""
combination.error_message = str(e)
logger.error(f"Error testing {combination.description}: {e}")
return combination
def create_html_table(table_data: List[List[str]]) -> str:
"""Create HTML table from table data"""
if not table_data:
return "<p>No results to display</p>"
headers = ["Description", "First Name", "Middle Name", "Last Name", "Status", "Result"]
html = """
<div style="overflow-x: auto; max-height: 600px; border: 1px solid #ddd;">
<table style="width: 100%; border-collapse: collapse; margin: 0;">
<thead style="position: sticky; top: 0; background: #f8f9fa;">
<tr>
"""
# Add headers
for header in headers:
html += f'<th style="border: 1px solid #ddd; padding: 12px; text-align: left; font-weight: bold; background: #f8f9fa;">{header}</th>'
html += """
</tr>
</thead>
<tbody>
"""
# Add data rows
for i, row in enumerate(table_data):
# Determine row color based on status
bg_color = "#ffffff"
if len(row) >= 5: # Check if status exists
if row[4] == "Success":
bg_color = "#d4edda" # Light green for success
elif row[4] == "Failed":
bg_color = "#f8d7da" # Light red for failed
elif row[4] == "Error":
bg_color = "#fff3cd" # Light yellow for error
html += f'<tr style="background-color: {bg_color};">'
# Only show first 6 columns (exclude error column for cleaner display)
for j, cell in enumerate(row[:6]):
display_text = str(cell) if cell else ""
if len(display_text) > 80:
display_text = display_text[:80] + "..."
html += f'<td style="border: 1px solid #ddd; padding: 8px; word-wrap: break-word; max-width: 200px;" title="{str(cell)}">{display_text}</td>'
html += '</tr>'
html += """
</tbody>
</table>
</div>
"""
return html
def validate_tin_sequential(first_name: str, middle_name: str, last_name: str,
tin_id: str, birthdate: str, gender: str) -> Tuple[List[List[str]], str, bool, str]:
"""
Sequential validation with updated BIR system
"""
# Input validation
if not all([first_name.strip(), last_name.strip(), tin_id.strip(), birthdate.strip(), gender.strip()]):
return [], "❌ Please fill in all required fields", False, ""
# Validate TIN format
clean_tin = tin_id.replace("-", "").replace(" ", "").strip()
if not clean_tin.isdigit() or len(clean_tin) != 9:
return [], f"❌ TIN must be exactly 9 digits (current: {len(clean_tin)} digits)", False, ""
# Generate combinations
combinations = generate_name_combinations(first_name, middle_name, last_name)
total_combinations = len(combinations)
if total_combinations == 0:
return [], "❌ Could not generate valid name combinations", False, ""
logger.info(f"Generated {total_combinations} name combinations")
# Sequential validation
completed = 0
successful_combination = None
for i, combo in enumerate(combinations):
logger.info(f"Testing combination {i+1}/{total_combinations}: {combo.description}")
result_combo = validate_single_combination(tin_id, birthdate, gender, combo)
# Update original combination
combo.status = result_combo.status
combo.result = result_combo.result
combo.error_message = result_combo.error_message
completed += 1
# Check for success
if combo.status == "Success" and not successful_combination:
successful_combination = combo
logger.info(f"βœ… Found successful combination: {combo.description}")
break # Stop on first success
# Create results table data
table_data = []
for combo in combinations:
if combo.status != "Pending": # Only show tested combinations
table_data.append([
combo.description,
combo.first_name,
combo.middle_name,
combo.last_name,
combo.status,
combo.result,
combo.error_message
])
# Generate HTML table
html_table = create_html_table(table_data)
# Generate summary
successful_count = sum(1 for combo in combinations if combo.status == "Success")
failed_count = sum(1 for combo in combinations if combo.status == "Failed")
error_count = sum(1 for combo in combinations if combo.status == "Error")
if successful_combination:
summary = f"""
πŸŽ‰ **TIN VALIDATION SUCCESSFUL!**
βœ… **Matching Combination:** {successful_combination.description}
πŸ“ **Details:** {successful_combination.result}
πŸ“Š **Summary:**
- βœ… Successful: {successful_count}
- ❌ Failed: {failed_count}
- ⚠️ Errors: {error_count}
- πŸ“‹ Total Tested: {completed}/{total_combinations}
πŸ†” **Validated Information:**
- **TIN:** {tin_id}
- **Name:** {successful_combination.first_name} {successful_combination.middle_name} {successful_combination.last_name}
- **Gender:** {gender}
- **Birthdate:** {birthdate}
"""
success = True
else:
summary = f"""
❌ **No successful TIN validation found**
πŸ“Š **Summary:**
- βœ… Successful: {successful_count}
- ❌ Failed: {failed_count}
- ⚠️ Errors: {error_count}
- πŸ“‹ Total Tested: {completed}/{total_combinations}
πŸ’‘ **Possible reasons:**
- Personal information doesn't match BIR database exactly
- TIN may not be registered or active
- Name formatting differences
- Website temporarily unavailable
- Data entry errors
πŸ”§ **Suggestions:**
- Double-check TIN number (must be 9 digits)
- Verify name spelling and format
- Check birthdate format (MM-DD-YYYY)
- Try again later if server issues
"""
success = False
return table_data, summary, success, html_table
# Create Gradio interface
def create_interface():
with gr.Blocks(title="TIN Validator - Updated", theme=gr.themes.Soft()) as demo:
gr.Markdown("""
# πŸ†” Philippine TIN Validator (AAI Data Operation Team)
### Validate Tax Identification Numbers using the official BIR system
This tool has been updated to work with the current BIR validation system.
It tests multiple name formatting combinations to find a match.
⚠️ **Important**: This tool respects the BIR server with appropriate delays between requests.
""")
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("### πŸ“ Required Information")
first_name = gr.Textbox(
label="First Name *",
placeholder="Juan",
info="Enter your first name exactly as registered"
)
middle_name = gr.Textbox(
label="Middle Name *",
placeholder="Santos",
info="Leave blank if you don't have a middle name"
)
last_name = gr.Textbox(
label="Last Name *",
placeholder="Dela Cruz",
info="Enter your last name exactly as registered"
)
tin_id = gr.Textbox(
label="TIN Number *",
placeholder="123456789",
info="9-digit number only (no hyphens)"
)
birthdate = gr.Textbox(
label="Birth Date *",
placeholder="01-15-1990 or 15-01-1990",
info="MM-DD-YYYY or DD-MM-YYYY format"
)
gender = gr.Radio(
choices=["Male", "Female"],
label="Gender *",
value="Male",
info="Select your gender as registered"
)
validate_btn = gr.Button(
"πŸ” Validate TIN",
variant="primary",
size="lg"
)
gr.Markdown("""
### ⏱️ Process Info:
- **Processing time**: 2-5 minutes
- **Rate limiting**: 3 seconds between attempts
- **Smart stopping**: Stops when match is found
- **Multiple formats**: Tests various name combinations
""")
with gr.Column(scale=2):
gr.Markdown("### πŸ“Š Validation Results")
progress_output = gr.Textbox(
label="Status",
value="Ready to validate - click the button to start",
interactive=False,
lines=2
)
summary_output = gr.Markdown(value="Results will appear here after validation...")
results_table = gr.HTML(
value="<p>Detailed results table will appear here...</p>",
label="Detailed Results"
)
# Event handlers
def validate_and_update(first, middle, last, tin, birth, gender_val):
try:
# Initial progress update
yield (
"πŸ”„ Starting TIN validation...\nConnecting to BIR system...",
"### πŸ”„ Processing...\nPlease wait while we validate your information.",
"<p style='text-align: center; color: #666;'>Validation in progress...</p>"
)
# Perform validation
table_data, summary, success, html_table = validate_tin_sequential(
first, middle, last, tin, birth, gender_val
)
if success:
yield (
"βœ… Validation completed successfully!",
summary,
html_table
)
else:
yield (
"❌ Validation completed - no matches found",
summary,
html_table if html_table else "<p>No detailed results available</p>"
)
except Exception as e:
error_msg = f"❌ An error occurred during validation:\n{str(e)}"
yield (
error_msg,
f"### ❌ Error\n```\n{str(e)}\n```\n\nPlease check your input and try again.",
"<p style='color: red;'>An error occurred during validation. Please try again.</p>"
)
validate_btn.click(
fn=validate_and_update,
inputs=[first_name, middle_name, last_name, tin_id, birthdate, gender],
outputs=[progress_output, summary_output, results_table]
)
# # Example usage
# gr.Markdown("### πŸ“‹ Example Input")
# gr.Examples(
# examples=[
# ["Juan", "Santos", "Dela Cruz", "123456789", "01-15-1990", "Male"],
# ["Maria", "Carmen", "Rodriguez", "987654321", "12-25-1985", "Female"],
# ["Jose", "", "Rizal", "111222333", "06-19-1861", "Male"],
# ],
# inputs=[first_name, middle_name, last_name, tin_id, birthdate, gender],
# )
# gr.Markdown("""
# ### πŸ”§ How it Works:
# 1. **Connects to BIR System** - Uses the official validation endpoint
# 2. **Tests Name Variations** - Tries different formatting combinations
# 3. **JSON Response Parsing** - Analyzes the server's response properly
# 4. **Sequential Processing** - One request at a time with delays
# ### πŸ“‹ Input Tips:
# - **TIN**: Enter exactly 9 digits (remove hyphens)
# - **Names**: Use the exact spelling from your registration
# - **Date**: MM-DD-YYYY format works best
# - **Patience**: Process takes 2-5 minutes due to rate limiting
# ### ⚠️ Troubleshooting:
# - If validation fails, check your TIN and personal information
# - Ensure birthdate is in correct format
# - Verify name spelling matches your BIR registration
# - Try again if server is temporarily unavailable
# """)
return demo
# Launch the app
if __name__ == "__main__":
demo = create_interface()
demo.launch(server_name="0.0.0.0", server_port=7860, share=True)