ANISA09 commited on
Commit
9602ad4
Β·
verified Β·
1 Parent(s): 5e164bd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +333 -61
app.py CHANGED
@@ -1,70 +1,342 @@
1
- import os
2
- import uuid
3
  import gradio as gr
4
- from inference_sdk import InferenceHTTPClient
 
 
 
 
 
 
5
 
6
- # Ensure uploads folder exists
7
- UPLOAD_DIR = "uploads"
8
- os.makedirs(UPLOAD_DIR, exist_ok=True)
9
 
10
- # Initialize Roboflow inference client
11
- CLIENT = InferenceHTTPClient(
12
- api_url="https://serverless.roboflow.com",
13
- api_key="i22FWkifZzD236Hhg56U" # ⚠️ Replace with your actual API key if different
14
- )
 
 
15
 
16
- # Model ID (Project Slug + Version)
17
- MODEL_ID = "detecting-fake-certificates-bj1x6/3"
 
 
 
 
 
 
 
 
 
 
 
 
18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
- def analyze_certificate(image):
21
- """Save uploaded image locally and send it to Roboflow for inference."""
22
- try:
23
- # Save image with unique filename
24
- filename = f"{uuid.uuid4()}.jpg"
25
- save_path = os.path.join(UPLOAD_DIR, filename)
26
- image.save(save_path)
27
- print(f"[INFO] Image saved at {save_path}")
28
-
29
- # Perform inference using Roboflow model
30
- result = CLIENT.infer(save_path, model_id=MODEL_ID)
31
- print("[INFO] Inference result:", result)
32
-
33
- # Parse predictions
34
- predictions = result.get("predictions", [])
35
- if not predictions:
36
- return "⚠️ No objects detected β€” possibly a valid certificate.", save_path
37
-
38
- # Build readable output
39
- output_lines = []
40
- for pred in predictions:
41
- cls = pred.get("class", "unknown")
42
- conf = round(pred.get("confidence", 0) * 100, 2)
43
- output_lines.append(f"- {cls} ({conf}% confidence)")
44
-
45
- output_text = "βœ… **Detections:**\n" + "\n".join(output_lines)
46
- return output_text, save_path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
- except Exception as e:
49
- print("[ERROR]", e)
50
- return f"❌ Error during inference: {e}", None
51
-
52
-
53
- # Gradio interface
54
- demo = gr.Interface(
55
- fn=analyze_certificate,
56
- inputs=gr.Image(type="pil", label="Upload a Certificate"),
57
- outputs=[
58
- gr.Textbox(label="Inference Result"),
59
- gr.Image(label="Uploaded Image")
60
- ],
61
- title="Fake Certificate Detector 🧠",
62
- description=(
63
- "Upload a certificate image β€” this app will use a trained Roboflow model "
64
- "(`detecting-fake-certificates-bj1x6/3`) to detect possible signs of forgery."
65
- ),
66
- allow_flagging="never"
67
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
 
 
69
  if __name__ == "__main__":
70
- demo.launch(server_name="0.0.0.0", server_port=7860)
 
 
 
1
  import gradio as gr
2
+ from transformers import ViTImageProcessor, ViTForImageClassification
3
+ from PIL import Image
4
+ import torch
5
+ import pytesseract
6
+ import re
7
+ from datetime import datetime
8
+ import numpy as np
9
 
10
+ # Load Vision Transformer model from Hugging Face
11
+ processor = ViTImageProcessor.from_pretrained('google/vit-base-patch16-224')
12
+ model = ViTForImageClassification.from_pretrained('google/vit-base-patch16-224')
13
 
14
+ def extract_text_from_image(image):
15
+ """Extract text from certificate image using OCR"""
16
+ try:
17
+ text = pytesseract.image_to_string(image)
18
+ return text
19
+ except Exception as e:
20
+ return f"OCR Error: {str(e)}"
21
 
22
+ def extract_dates(text):
23
+ """Extract dates from text"""
24
+ date_patterns = [
25
+ r'\d{1,2}[-/]\d{1,2}[-/]\d{2,4}',
26
+ r'\d{4}[-/]\d{1,2}[-/]\d{1,2}',
27
+ r'(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]* \d{1,2},? \d{4}'
28
+ ]
29
+
30
+ dates = []
31
+ for pattern in date_patterns:
32
+ matches = re.findall(pattern, text, re.IGNORECASE)
33
+ dates.extend(matches)
34
+
35
+ return dates
36
 
37
+ def analyze_with_vit(image):
38
+ """Use ViT model to classify image quality and authenticity markers"""
39
+ inputs = processor(images=image, return_tensors="pt")
40
+
41
+ with torch.no_grad():
42
+ outputs = model(**inputs)
43
+ logits = outputs.logits
44
+
45
+ # Get confidence score
46
+ probs = torch.nn.functional.softmax(logits, dim=-1)
47
+ confidence = torch.max(probs).item() * 100
48
+
49
+ # Get prediction
50
+ predicted_class = logits.argmax(-1).item()
51
+
52
+ return confidence, predicted_class
53
 
54
+ def compare_data(extracted_text, user_name, user_course, user_date, user_issuer):
55
+ """Compare extracted data with user provided data"""
56
+ matches = {
57
+ 'name': False,
58
+ 'course': False,
59
+ 'date': False,
60
+ 'issuer': False
61
+ }
62
+
63
+ issues = []
64
+ score = 100
65
+
66
+ # Clean text for comparison
67
+ text_lower = extracted_text.lower()
68
+
69
+ # Check Name
70
+ if user_name.strip():
71
+ if user_name.lower() in text_lower:
72
+ matches['name'] = True
73
+ issues.append(("βœ…", "Name match found", "good"))
74
+ else:
75
+ matches['name'] = False
76
+ issues.append(("❌", f"Name '{user_name}' NOT found in certificate", "bad"))
77
+ score -= 25
78
+
79
+ # Check Course/Program
80
+ if user_course.strip():
81
+ course_words = user_course.lower().split()
82
+ course_match = any(word in text_lower for word in course_words if len(word) > 3)
83
+
84
+ if course_match:
85
+ matches['course'] = True
86
+ issues.append(("βœ…", "Course/Program match found", "good"))
87
+ else:
88
+ matches['course'] = False
89
+ issues.append(("❌", f"Course '{user_course}' NOT found in certificate", "bad"))
90
+ score -= 20
91
+
92
+ # Check Date
93
+ if user_date.strip():
94
+ extracted_dates = extract_dates(extracted_text)
95
+ date_found = any(user_date in date_str for date_str in extracted_dates)
96
+
97
+ if date_found or user_date.replace('-', '/') in text_lower or user_date.replace('/', '-') in text_lower:
98
+ matches['date'] = True
99
+ issues.append(("βœ…", f"Date '{user_date}' verified", "good"))
100
+ else:
101
+ matches['date'] = False
102
+ issues.append(("⚠️", f"Date '{user_date}' NOT found (Found: {', '.join(extracted_dates[:3]) if extracted_dates else 'None'})", "warning"))
103
+ score -= 20
104
+
105
+ # Check Issuer/Organization
106
+ if user_issuer.strip():
107
+ issuer_words = user_issuer.lower().split()
108
+ issuer_match = any(word in text_lower for word in issuer_words if len(word) > 3)
109
+
110
+ if issuer_match:
111
+ matches['issuer'] = True
112
+ issues.append(("βœ…", f"Issuer '{user_issuer}' verified", "good"))
113
+ else:
114
+ matches['issuer'] = False
115
+ issues.append(("❌", f"Issuer '{user_issuer}' NOT found in certificate", "bad"))
116
+ score -= 15
117
+
118
+ return matches, issues, max(0, score)
119
 
120
+ def validate_certificate(image, user_name, user_course, user_date, user_issuer):
121
+ """Main validation function"""
122
+
123
+ if image is None:
124
+ return "❌ Please upload an image", "", {}, 0
125
+
126
+ # Convert to PIL Image if needed
127
+ if not isinstance(image, Image.Image):
128
+ image = Image.fromarray(image)
129
+
130
+ # Step 1: Extract text using OCR
131
+ extracted_text = extract_text_from_image(image)
132
+
133
+ # Step 2: Use ViT model for image quality analysis
134
+ vit_confidence, vit_class = analyze_with_vit(image)
135
+
136
+ # Step 3: Compare extracted data with user data
137
+ matches, comparison_issues, comparison_score = compare_data(
138
+ extracted_text, user_name, user_course, user_date, user_issuer
139
+ )
140
+
141
+ # Step 4: Calculate final score
142
+ # Weight: 40% ViT confidence, 60% data matching
143
+ final_score = int((vit_confidence * 0.4) + (comparison_score * 0.6))
144
+
145
+ # Step 5: Generate verdict
146
+ if final_score >= 70 and comparison_score >= 70:
147
+ verdict = "βœ… CERTIFICATE VALID"
148
+ verdict_color = "🟒"
149
+ verdict_detail = "All verification checks passed. Certificate appears authentic."
150
+ elif final_score >= 50:
151
+ verdict = "⚠️ VERIFICATION NEEDED"
152
+ verdict_color = "🟑"
153
+ verdict_detail = "Some discrepancies found. Manual verification recommended."
154
+ else:
155
+ verdict = "❌ CERTIFICATE INVALID"
156
+ verdict_color = "πŸ”΄"
157
+ verdict_detail = "Multiple verification failures. Certificate likely fake or incorrect."
158
+
159
+ # Create detailed report
160
+ report = f"""
161
+ # {verdict_color} {verdict}
162
+
163
+ **Final Score:** {final_score}/100
164
+ **ViT Model Confidence:** {vit_confidence:.1f}%
165
+ **Data Match Score:** {comparison_score}/100
166
+
167
+ ---
168
+
169
+ ## πŸ“Š Verification Results
170
+
171
+ ### Data Comparison:
172
+ """
173
+
174
+ for emoji, issue, status in comparison_issues:
175
+ report += f"\n{emoji} {issue}"
176
+
177
+ report += f"""
178
+
179
+ ---
180
+
181
+ ## πŸ” Extracted Certificate Text:
182
+ ```
183
+ {extracted_text[:500]}{'...' if len(extracted_text) > 500 else ''}
184
+ ```
185
+
186
+ ---
187
+
188
+ ## πŸ€– AI Model Analysis:
189
+ - **Model:** Google Vision Transformer (ViT)
190
+ - **Architecture:** ViT-Base-Patch16-224
191
+ - **Image Quality Score:** {vit_confidence:.1f}%
192
+ - **Classification:** Class {vit_class}
193
+
194
+ ---
195
+
196
+ ## βš–οΈ Final Verdict:
197
+ {verdict_detail}
198
+
199
+ ### Match Summary:
200
+ - Name: {"βœ… Verified" if matches['name'] else "❌ Not Found"}
201
+ - Course: {"βœ… Verified" if matches['course'] else "❌ Not Found"}
202
+ - Date: {"βœ… Verified" if matches['date'] else "❌ Not Found"}
203
+ - Issuer: {"βœ… Verified" if matches['issuer'] else "❌ Not Found"}
204
+
205
+ ---
206
+
207
+ *⚠️ Disclaimer: This is an automated verification system. For legal purposes,
208
+ please verify with the issuing authority.*
209
+ """
210
+
211
+ # Create JSON output
212
+ json_output = {
213
+ "verdict": verdict,
214
+ "final_score": final_score,
215
+ "vit_confidence": round(vit_confidence, 2),
216
+ "data_match_score": comparison_score,
217
+ "matches": matches,
218
+ "extracted_text_preview": extracted_text[:200]
219
+ }
220
+
221
+ return report, extracted_text, json_output, final_score
222
+
223
+
224
+ # Create Gradio Interface
225
+ with gr.Blocks(theme=gr.themes.Soft(), title="AI Certificate Validator") as demo:
226
+
227
+ gr.Markdown("""
228
+ # πŸ›‘οΈ AI-Powered Certificate Validation System
229
+
230
+ ### Powered by Google's Vision Transformer (ViT) + OCR
231
+
232
+ Upload a certificate image and provide the expected details. The AI will:
233
+ 1. Extract text using OCR (Optical Character Recognition)
234
+ 2. Analyze image quality using ViT deep learning model
235
+ 3. Compare extracted data with your provided information
236
+ 4. Generate a comprehensive validation report
237
+ """)
238
+
239
+ with gr.Row():
240
+ with gr.Column(scale=1):
241
+ gr.Markdown("## πŸ“€ Upload Certificate")
242
+ image_input = gr.Image(
243
+ label="Certificate Image",
244
+ type="pil",
245
+ sources=["upload", "clipboard", "webcam"]
246
+ )
247
+
248
+ gr.Markdown("## πŸ“ Expected Certificate Details")
249
+
250
+ user_name = gr.Textbox(
251
+ label="Full Name (as on certificate)",
252
+ placeholder="e.g., John Smith",
253
+ lines=1
254
+ )
255
+
256
+ user_course = gr.Textbox(
257
+ label="Course/Program Name",
258
+ placeholder="e.g., Machine Learning Certification",
259
+ lines=1
260
+ )
261
+
262
+ user_date = gr.Textbox(
263
+ label="Issue Date",
264
+ placeholder="e.g., 2024-01-15 or Jan 15, 2024",
265
+ lines=1
266
+ )
267
+
268
+ user_issuer = gr.Textbox(
269
+ label="Issuing Organization",
270
+ placeholder="e.g., Stanford University",
271
+ lines=1
272
+ )
273
+
274
+ validate_btn = gr.Button("πŸ” Validate Certificate", variant="primary", size="lg")
275
+
276
+ gr.Markdown("""
277
+ ### πŸ’‘ Tips:
278
+ - Ensure certificate image is clear and readable
279
+ - Provide exact details as they appear on certificate
280
+ - Date format: YYYY-MM-DD or Month DD, YYYY
281
+ """)
282
+
283
+ with gr.Column(scale=1):
284
+ gr.Markdown("## πŸ“‹ Validation Report")
285
+
286
+ report_output = gr.Markdown(label="Analysis Report")
287
+
288
+ score_output = gr.Number(
289
+ label="Final Validation Score",
290
+ precision=0
291
+ )
292
+
293
+ with gr.Accordion("πŸ“„ Extracted Text (OCR)", open=False):
294
+ extracted_text_output = gr.Textbox(
295
+ label="Raw Extracted Text",
296
+ lines=10,
297
+ max_lines=20
298
+ )
299
+
300
+ with gr.Accordion("πŸ”§ Technical Details (JSON)", open=False):
301
+ json_output = gr.JSON(label="Detailed Results")
302
+
303
+ # Connect button to function
304
+ validate_btn.click(
305
+ fn=validate_certificate,
306
+ inputs=[image_input, user_name, user_course, user_date, user_issuer],
307
+ outputs=[report_output, extracted_text_output, json_output, score_output]
308
+ )
309
+
310
+ gr.Markdown("""
311
+ ---
312
+
313
+ ## 🎯 How It Works:
314
+
315
+ 1. **Image Upload**: Certificate image is uploaded
316
+ 2. **OCR Processing**: Tesseract extracts all text from image
317
+ 3. **ViT Analysis**: Google's Vision Transformer analyzes image quality
318
+ 4. **Data Matching**: Compares extracted text with user-provided details
319
+ 5. **Scoring**: Combines AI confidence + data match accuracy
320
+ 6. **Verdict**: Generates final validation report
321
+
322
+ ## πŸ”§ Technology Stack:
323
+ - **AI Model**: Google Vision Transformer (ViT-Base-Patch16-224)
324
+ - **OCR Engine**: Tesseract OCR
325
+ - **Framework**: Hugging Face Transformers + Gradio
326
+ - **Deployment**: Hugging Face Spaces
327
+
328
+ ## πŸ“Š Use Cases:
329
+ - Academic certificate verification
330
+ - Professional credential validation
331
+ - Employment background checks
332
+ - Document fraud detection
333
+
334
+ ---
335
+
336
+ **πŸš€ Created for Hackathon Demo**
337
+ *For production use, integrate with official verification APIs*
338
+ """)
339
 
340
+ # Launch the app
341
  if __name__ == "__main__":
342
+ demo.launch(share=True)