File size: 11,316 Bytes
a392ba6
9602ad4
 
 
 
 
 
 
a392ba6
9602ad4
 
 
cb2f4b6
9602ad4
 
 
 
 
 
 
39d17aa
9602ad4
 
 
 
 
 
 
 
 
 
 
 
 
 
a392ba6
9602ad4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a392ba6
9602ad4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39d17aa
9602ad4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a392ba6
9602ad4
a392ba6
9602ad4
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
import gradio as gr
from transformers import ViTImageProcessor, ViTForImageClassification
from PIL import Image
import torch
import pytesseract
import re
from datetime import datetime
import numpy as np

# Load Vision Transformer model from Hugging Face
processor = ViTImageProcessor.from_pretrained('google/vit-base-patch16-224')
model = ViTForImageClassification.from_pretrained('google/vit-base-patch16-224')

def extract_text_from_image(image):
    """Extract text from certificate image using OCR"""
    try:
        text = pytesseract.image_to_string(image)
        return text
    except Exception as e:
        return f"OCR Error: {str(e)}"

def extract_dates(text):
    """Extract dates from text"""
    date_patterns = [
        r'\d{1,2}[-/]\d{1,2}[-/]\d{2,4}',
        r'\d{4}[-/]\d{1,2}[-/]\d{1,2}',
        r'(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]* \d{1,2},? \d{4}'
    ]
    
    dates = []
    for pattern in date_patterns:
        matches = re.findall(pattern, text, re.IGNORECASE)
        dates.extend(matches)
    
    return dates

def analyze_with_vit(image):
    """Use ViT model to classify image quality and authenticity markers"""
    inputs = processor(images=image, return_tensors="pt")
    
    with torch.no_grad():
        outputs = model(**inputs)
        logits = outputs.logits
        
        # Get confidence score
        probs = torch.nn.functional.softmax(logits, dim=-1)
        confidence = torch.max(probs).item() * 100
        
        # Get prediction
        predicted_class = logits.argmax(-1).item()
    
    return confidence, predicted_class

def compare_data(extracted_text, user_name, user_course, user_date, user_issuer):
    """Compare extracted data with user provided data"""
    matches = {
        'name': False,
        'course': False,
        'date': False,
        'issuer': False
    }
    
    issues = []
    score = 100
    
    # Clean text for comparison
    text_lower = extracted_text.lower()
    
    # Check Name
    if user_name.strip():
        if user_name.lower() in text_lower:
            matches['name'] = True
            issues.append(("βœ…", "Name match found", "good"))
        else:
            matches['name'] = False
            issues.append(("❌", f"Name '{user_name}' NOT found in certificate", "bad"))
            score -= 25
    
    # Check Course/Program
    if user_course.strip():
        course_words = user_course.lower().split()
        course_match = any(word in text_lower for word in course_words if len(word) > 3)
        
        if course_match:
            matches['course'] = True
            issues.append(("βœ…", "Course/Program match found", "good"))
        else:
            matches['course'] = False
            issues.append(("❌", f"Course '{user_course}' NOT found in certificate", "bad"))
            score -= 20
    
    # Check Date
    if user_date.strip():
        extracted_dates = extract_dates(extracted_text)
        date_found = any(user_date in date_str for date_str in extracted_dates)
        
        if date_found or user_date.replace('-', '/') in text_lower or user_date.replace('/', '-') in text_lower:
            matches['date'] = True
            issues.append(("βœ…", f"Date '{user_date}' verified", "good"))
        else:
            matches['date'] = False
            issues.append(("⚠️", f"Date '{user_date}' NOT found (Found: {', '.join(extracted_dates[:3]) if extracted_dates else 'None'})", "warning"))
            score -= 20
    
    # Check Issuer/Organization
    if user_issuer.strip():
        issuer_words = user_issuer.lower().split()
        issuer_match = any(word in text_lower for word in issuer_words if len(word) > 3)
        
        if issuer_match:
            matches['issuer'] = True
            issues.append(("βœ…", f"Issuer '{user_issuer}' verified", "good"))
        else:
            matches['issuer'] = False
            issues.append(("❌", f"Issuer '{user_issuer}' NOT found in certificate", "bad"))
            score -= 15
    
    return matches, issues, max(0, score)

def validate_certificate(image, user_name, user_course, user_date, user_issuer):
    """Main validation function"""
    
    if image is None:
        return "❌ Please upload an image", "", {}, 0
    
    # Convert to PIL Image if needed
    if not isinstance(image, Image.Image):
        image = Image.fromarray(image)
    
    # Step 1: Extract text using OCR
    extracted_text = extract_text_from_image(image)
    
    # Step 2: Use ViT model for image quality analysis
    vit_confidence, vit_class = analyze_with_vit(image)
    
    # Step 3: Compare extracted data with user data
    matches, comparison_issues, comparison_score = compare_data(
        extracted_text, user_name, user_course, user_date, user_issuer
    )
    
    # Step 4: Calculate final score
    # Weight: 40% ViT confidence, 60% data matching
    final_score = int((vit_confidence * 0.4) + (comparison_score * 0.6))
    
    # Step 5: Generate verdict
    if final_score >= 70 and comparison_score >= 70:
        verdict = "βœ… CERTIFICATE VALID"
        verdict_color = "🟒"
        verdict_detail = "All verification checks passed. Certificate appears authentic."
    elif final_score >= 50:
        verdict = "⚠️ VERIFICATION NEEDED"
        verdict_color = "🟑"
        verdict_detail = "Some discrepancies found. Manual verification recommended."
    else:
        verdict = "❌ CERTIFICATE INVALID"
        verdict_color = "πŸ”΄"
        verdict_detail = "Multiple verification failures. Certificate likely fake or incorrect."
    
    # Create detailed report
    report = f"""
# {verdict_color} {verdict}

**Final Score:** {final_score}/100  
**ViT Model Confidence:** {vit_confidence:.1f}%  
**Data Match Score:** {comparison_score}/100

---

## πŸ“Š Verification Results

### Data Comparison:
"""
    
    for emoji, issue, status in comparison_issues:
        report += f"\n{emoji} {issue}"
    
    report += f"""

---

## πŸ” Extracted Certificate Text:
```
{extracted_text[:500]}{'...' if len(extracted_text) > 500 else ''}
```

---

## πŸ€– AI Model Analysis:
- **Model:** Google Vision Transformer (ViT)
- **Architecture:** ViT-Base-Patch16-224
- **Image Quality Score:** {vit_confidence:.1f}%
- **Classification:** Class {vit_class}

---

## βš–οΈ Final Verdict:
{verdict_detail}

### Match Summary:
- Name: {"βœ… Verified" if matches['name'] else "❌ Not Found"}
- Course: {"βœ… Verified" if matches['course'] else "❌ Not Found"}
- Date: {"βœ… Verified" if matches['date'] else "❌ Not Found"}
- Issuer: {"βœ… Verified" if matches['issuer'] else "❌ Not Found"}

---

*⚠️ Disclaimer: This is an automated verification system. For legal purposes, 
please verify with the issuing authority.*
"""
    
    # Create JSON output
    json_output = {
        "verdict": verdict,
        "final_score": final_score,
        "vit_confidence": round(vit_confidence, 2),
        "data_match_score": comparison_score,
        "matches": matches,
        "extracted_text_preview": extracted_text[:200]
    }
    
    return report, extracted_text, json_output, final_score


# Create Gradio Interface
with gr.Blocks(theme=gr.themes.Soft(), title="AI Certificate Validator") as demo:
    
    gr.Markdown("""
    # πŸ›‘οΈ AI-Powered Certificate Validation System
    
    ### Powered by Google's Vision Transformer (ViT) + OCR
    
    Upload a certificate image and provide the expected details. The AI will:
    1. Extract text using OCR (Optical Character Recognition)
    2. Analyze image quality using ViT deep learning model
    3. Compare extracted data with your provided information
    4. Generate a comprehensive validation report
    """)
    
    with gr.Row():
        with gr.Column(scale=1):
            gr.Markdown("## πŸ“€ Upload Certificate")
            image_input = gr.Image(
                label="Certificate Image",
                type="pil",
                sources=["upload", "clipboard", "webcam"]
            )
            
            gr.Markdown("## πŸ“ Expected Certificate Details")
            
            user_name = gr.Textbox(
                label="Full Name (as on certificate)",
                placeholder="e.g., John Smith",
                lines=1
            )
            
            user_course = gr.Textbox(
                label="Course/Program Name",
                placeholder="e.g., Machine Learning Certification",
                lines=1
            )
            
            user_date = gr.Textbox(
                label="Issue Date",
                placeholder="e.g., 2024-01-15 or Jan 15, 2024",
                lines=1
            )
            
            user_issuer = gr.Textbox(
                label="Issuing Organization",
                placeholder="e.g., Stanford University",
                lines=1
            )
            
            validate_btn = gr.Button("πŸ” Validate Certificate", variant="primary", size="lg")
            
            gr.Markdown("""
            ### πŸ’‘ Tips:
            - Ensure certificate image is clear and readable
            - Provide exact details as they appear on certificate
            - Date format: YYYY-MM-DD or Month DD, YYYY
            """)
        
        with gr.Column(scale=1):
            gr.Markdown("## πŸ“‹ Validation Report")
            
            report_output = gr.Markdown(label="Analysis Report")
            
            score_output = gr.Number(
                label="Final Validation Score",
                precision=0
            )
            
            with gr.Accordion("πŸ“„ Extracted Text (OCR)", open=False):
                extracted_text_output = gr.Textbox(
                    label="Raw Extracted Text",
                    lines=10,
                    max_lines=20
                )
            
            with gr.Accordion("πŸ”§ Technical Details (JSON)", open=False):
                json_output = gr.JSON(label="Detailed Results")
    
    # Connect button to function
    validate_btn.click(
        fn=validate_certificate,
        inputs=[image_input, user_name, user_course, user_date, user_issuer],
        outputs=[report_output, extracted_text_output, json_output, score_output]
    )
    
    gr.Markdown("""
    ---
    
    ## 🎯 How It Works:
    
    1. **Image Upload**: Certificate image is uploaded
    2. **OCR Processing**: Tesseract extracts all text from image
    3. **ViT Analysis**: Google's Vision Transformer analyzes image quality
    4. **Data Matching**: Compares extracted text with user-provided details
    5. **Scoring**: Combines AI confidence + data match accuracy
    6. **Verdict**: Generates final validation report
    
    ## πŸ”§ Technology Stack:
    - **AI Model**: Google Vision Transformer (ViT-Base-Patch16-224)
    - **OCR Engine**: Tesseract OCR
    - **Framework**: Hugging Face Transformers + Gradio
    - **Deployment**: Hugging Face Spaces
    
    ## πŸ“Š Use Cases:
    - Academic certificate verification
    - Professional credential validation
    - Employment background checks
    - Document fraud detection
    
    ---
    
    **πŸš€ Created for Hackathon Demo**  
    *For production use, integrate with official verification APIs*
    """)

# Launch the app
if __name__ == "__main__":
    demo.launch(share=True)