|
|
import gradio as gr |
|
|
from transformers import ViTImageProcessor, ViTForImageClassification |
|
|
from PIL import Image |
|
|
import torch |
|
|
import pytesseract |
|
|
import re |
|
|
from datetime import datetime |
|
|
import numpy as np |
|
|
|
|
|
|
|
|
processor = ViTImageProcessor.from_pretrained('google/vit-base-patch16-224') |
|
|
model = ViTForImageClassification.from_pretrained('google/vit-base-patch16-224') |
|
|
|
|
|
def extract_text_from_image(image): |
|
|
"""Extract text from certificate image using OCR""" |
|
|
try: |
|
|
text = pytesseract.image_to_string(image) |
|
|
return text |
|
|
except Exception as e: |
|
|
return f"OCR Error: {str(e)}" |
|
|
|
|
|
def extract_dates(text): |
|
|
"""Extract dates from text""" |
|
|
date_patterns = [ |
|
|
r'\d{1,2}[-/]\d{1,2}[-/]\d{2,4}', |
|
|
r'\d{4}[-/]\d{1,2}[-/]\d{1,2}', |
|
|
r'(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]* \d{1,2},? \d{4}' |
|
|
] |
|
|
|
|
|
dates = [] |
|
|
for pattern in date_patterns: |
|
|
matches = re.findall(pattern, text, re.IGNORECASE) |
|
|
dates.extend(matches) |
|
|
|
|
|
return dates |
|
|
|
|
|
def analyze_with_vit(image): |
|
|
"""Use ViT model to classify image quality and authenticity markers""" |
|
|
inputs = processor(images=image, return_tensors="pt") |
|
|
|
|
|
with torch.no_grad(): |
|
|
outputs = model(**inputs) |
|
|
logits = outputs.logits |
|
|
|
|
|
|
|
|
probs = torch.nn.functional.softmax(logits, dim=-1) |
|
|
confidence = torch.max(probs).item() * 100 |
|
|
|
|
|
|
|
|
predicted_class = logits.argmax(-1).item() |
|
|
|
|
|
return confidence, predicted_class |
|
|
|
|
|
def compare_data(extracted_text, user_name, user_course, user_date, user_issuer): |
|
|
"""Compare extracted data with user provided data""" |
|
|
matches = { |
|
|
'name': False, |
|
|
'course': False, |
|
|
'date': False, |
|
|
'issuer': False |
|
|
} |
|
|
|
|
|
issues = [] |
|
|
score = 100 |
|
|
|
|
|
|
|
|
text_lower = extracted_text.lower() |
|
|
|
|
|
|
|
|
if user_name.strip(): |
|
|
if user_name.lower() in text_lower: |
|
|
matches['name'] = True |
|
|
issues.append(("β
", "Name match found", "good")) |
|
|
else: |
|
|
matches['name'] = False |
|
|
issues.append(("β", f"Name '{user_name}' NOT found in certificate", "bad")) |
|
|
score -= 25 |
|
|
|
|
|
|
|
|
if user_course.strip(): |
|
|
course_words = user_course.lower().split() |
|
|
course_match = any(word in text_lower for word in course_words if len(word) > 3) |
|
|
|
|
|
if course_match: |
|
|
matches['course'] = True |
|
|
issues.append(("β
", "Course/Program match found", "good")) |
|
|
else: |
|
|
matches['course'] = False |
|
|
issues.append(("β", f"Course '{user_course}' NOT found in certificate", "bad")) |
|
|
score -= 20 |
|
|
|
|
|
|
|
|
if user_date.strip(): |
|
|
extracted_dates = extract_dates(extracted_text) |
|
|
date_found = any(user_date in date_str for date_str in extracted_dates) |
|
|
|
|
|
if date_found or user_date.replace('-', '/') in text_lower or user_date.replace('/', '-') in text_lower: |
|
|
matches['date'] = True |
|
|
issues.append(("β
", f"Date '{user_date}' verified", "good")) |
|
|
else: |
|
|
matches['date'] = False |
|
|
issues.append(("β οΈ", f"Date '{user_date}' NOT found (Found: {', '.join(extracted_dates[:3]) if extracted_dates else 'None'})", "warning")) |
|
|
score -= 20 |
|
|
|
|
|
|
|
|
if user_issuer.strip(): |
|
|
issuer_words = user_issuer.lower().split() |
|
|
issuer_match = any(word in text_lower for word in issuer_words if len(word) > 3) |
|
|
|
|
|
if issuer_match: |
|
|
matches['issuer'] = True |
|
|
issues.append(("β
", f"Issuer '{user_issuer}' verified", "good")) |
|
|
else: |
|
|
matches['issuer'] = False |
|
|
issues.append(("β", f"Issuer '{user_issuer}' NOT found in certificate", "bad")) |
|
|
score -= 15 |
|
|
|
|
|
return matches, issues, max(0, score) |
|
|
|
|
|
def validate_certificate(image, user_name, user_course, user_date, user_issuer): |
|
|
"""Main validation function""" |
|
|
|
|
|
if image is None: |
|
|
return "β Please upload an image", "", {}, 0 |
|
|
|
|
|
|
|
|
if not isinstance(image, Image.Image): |
|
|
image = Image.fromarray(image) |
|
|
|
|
|
|
|
|
extracted_text = extract_text_from_image(image) |
|
|
|
|
|
|
|
|
vit_confidence, vit_class = analyze_with_vit(image) |
|
|
|
|
|
|
|
|
matches, comparison_issues, comparison_score = compare_data( |
|
|
extracted_text, user_name, user_course, user_date, user_issuer |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
final_score = int((vit_confidence * 0.4) + (comparison_score * 0.6)) |
|
|
|
|
|
|
|
|
if final_score >= 70 and comparison_score >= 70: |
|
|
verdict = "β
CERTIFICATE VALID" |
|
|
verdict_color = "π’" |
|
|
verdict_detail = "All verification checks passed. Certificate appears authentic." |
|
|
elif final_score >= 50: |
|
|
verdict = "β οΈ VERIFICATION NEEDED" |
|
|
verdict_color = "π‘" |
|
|
verdict_detail = "Some discrepancies found. Manual verification recommended." |
|
|
else: |
|
|
verdict = "β CERTIFICATE INVALID" |
|
|
verdict_color = "π΄" |
|
|
verdict_detail = "Multiple verification failures. Certificate likely fake or incorrect." |
|
|
|
|
|
|
|
|
report = f""" |
|
|
# {verdict_color} {verdict} |
|
|
|
|
|
**Final Score:** {final_score}/100 |
|
|
**ViT Model Confidence:** {vit_confidence:.1f}% |
|
|
**Data Match Score:** {comparison_score}/100 |
|
|
|
|
|
--- |
|
|
|
|
|
## π Verification Results |
|
|
|
|
|
### Data Comparison: |
|
|
""" |
|
|
|
|
|
for emoji, issue, status in comparison_issues: |
|
|
report += f"\n{emoji} {issue}" |
|
|
|
|
|
report += f""" |
|
|
|
|
|
--- |
|
|
|
|
|
## π Extracted Certificate Text: |
|
|
``` |
|
|
{extracted_text[:500]}{'...' if len(extracted_text) > 500 else ''} |
|
|
``` |
|
|
|
|
|
--- |
|
|
|
|
|
## π€ AI Model Analysis: |
|
|
- **Model:** Google Vision Transformer (ViT) |
|
|
- **Architecture:** ViT-Base-Patch16-224 |
|
|
- **Image Quality Score:** {vit_confidence:.1f}% |
|
|
- **Classification:** Class {vit_class} |
|
|
|
|
|
--- |
|
|
|
|
|
## βοΈ Final Verdict: |
|
|
{verdict_detail} |
|
|
|
|
|
### Match Summary: |
|
|
- Name: {"β
Verified" if matches['name'] else "β Not Found"} |
|
|
- Course: {"β
Verified" if matches['course'] else "β Not Found"} |
|
|
- Date: {"β
Verified" if matches['date'] else "β Not Found"} |
|
|
- Issuer: {"β
Verified" if matches['issuer'] else "β Not Found"} |
|
|
|
|
|
--- |
|
|
|
|
|
*β οΈ Disclaimer: This is an automated verification system. For legal purposes, |
|
|
please verify with the issuing authority.* |
|
|
""" |
|
|
|
|
|
|
|
|
json_output = { |
|
|
"verdict": verdict, |
|
|
"final_score": final_score, |
|
|
"vit_confidence": round(vit_confidence, 2), |
|
|
"data_match_score": comparison_score, |
|
|
"matches": matches, |
|
|
"extracted_text_preview": extracted_text[:200] |
|
|
} |
|
|
|
|
|
return report, extracted_text, json_output, final_score |
|
|
|
|
|
|
|
|
|
|
|
with gr.Blocks(theme=gr.themes.Soft(), title="AI Certificate Validator") as demo: |
|
|
|
|
|
gr.Markdown(""" |
|
|
# π‘οΈ AI-Powered Certificate Validation System |
|
|
|
|
|
### Powered by Google's Vision Transformer (ViT) + OCR |
|
|
|
|
|
Upload a certificate image and provide the expected details. The AI will: |
|
|
1. Extract text using OCR (Optical Character Recognition) |
|
|
2. Analyze image quality using ViT deep learning model |
|
|
3. Compare extracted data with your provided information |
|
|
4. Generate a comprehensive validation report |
|
|
""") |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(scale=1): |
|
|
gr.Markdown("## π€ Upload Certificate") |
|
|
image_input = gr.Image( |
|
|
label="Certificate Image", |
|
|
type="pil", |
|
|
sources=["upload", "clipboard", "webcam"] |
|
|
) |
|
|
|
|
|
gr.Markdown("## π Expected Certificate Details") |
|
|
|
|
|
user_name = gr.Textbox( |
|
|
label="Full Name (as on certificate)", |
|
|
placeholder="e.g., John Smith", |
|
|
lines=1 |
|
|
) |
|
|
|
|
|
user_course = gr.Textbox( |
|
|
label="Course/Program Name", |
|
|
placeholder="e.g., Machine Learning Certification", |
|
|
lines=1 |
|
|
) |
|
|
|
|
|
user_date = gr.Textbox( |
|
|
label="Issue Date", |
|
|
placeholder="e.g., 2024-01-15 or Jan 15, 2024", |
|
|
lines=1 |
|
|
) |
|
|
|
|
|
user_issuer = gr.Textbox( |
|
|
label="Issuing Organization", |
|
|
placeholder="e.g., Stanford University", |
|
|
lines=1 |
|
|
) |
|
|
|
|
|
validate_btn = gr.Button("π Validate Certificate", variant="primary", size="lg") |
|
|
|
|
|
gr.Markdown(""" |
|
|
### π‘ Tips: |
|
|
- Ensure certificate image is clear and readable |
|
|
- Provide exact details as they appear on certificate |
|
|
- Date format: YYYY-MM-DD or Month DD, YYYY |
|
|
""") |
|
|
|
|
|
with gr.Column(scale=1): |
|
|
gr.Markdown("## π Validation Report") |
|
|
|
|
|
report_output = gr.Markdown(label="Analysis Report") |
|
|
|
|
|
score_output = gr.Number( |
|
|
label="Final Validation Score", |
|
|
precision=0 |
|
|
) |
|
|
|
|
|
with gr.Accordion("π Extracted Text (OCR)", open=False): |
|
|
extracted_text_output = gr.Textbox( |
|
|
label="Raw Extracted Text", |
|
|
lines=10, |
|
|
max_lines=20 |
|
|
) |
|
|
|
|
|
with gr.Accordion("π§ Technical Details (JSON)", open=False): |
|
|
json_output = gr.JSON(label="Detailed Results") |
|
|
|
|
|
|
|
|
validate_btn.click( |
|
|
fn=validate_certificate, |
|
|
inputs=[image_input, user_name, user_course, user_date, user_issuer], |
|
|
outputs=[report_output, extracted_text_output, json_output, score_output] |
|
|
) |
|
|
|
|
|
gr.Markdown(""" |
|
|
--- |
|
|
|
|
|
## π― How It Works: |
|
|
|
|
|
1. **Image Upload**: Certificate image is uploaded |
|
|
2. **OCR Processing**: Tesseract extracts all text from image |
|
|
3. **ViT Analysis**: Google's Vision Transformer analyzes image quality |
|
|
4. **Data Matching**: Compares extracted text with user-provided details |
|
|
5. **Scoring**: Combines AI confidence + data match accuracy |
|
|
6. **Verdict**: Generates final validation report |
|
|
|
|
|
## π§ Technology Stack: |
|
|
- **AI Model**: Google Vision Transformer (ViT-Base-Patch16-224) |
|
|
- **OCR Engine**: Tesseract OCR |
|
|
- **Framework**: Hugging Face Transformers + Gradio |
|
|
- **Deployment**: Hugging Face Spaces |
|
|
|
|
|
## π Use Cases: |
|
|
- Academic certificate verification |
|
|
- Professional credential validation |
|
|
- Employment background checks |
|
|
- Document fraud detection |
|
|
|
|
|
--- |
|
|
|
|
|
**π Created for Hackathon Demo** |
|
|
*For production use, integrate with official verification APIs* |
|
|
""") |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch(share=True) |