Spaces:
Sleeping
Sleeping
| """ | |
| DTD DocTamper - Gradio Application | |
| Document Tampering Detection using DTD model | |
| """ | |
| import gradio as gr | |
| import numpy as np | |
| from inference import DTDPredictor | |
| # Initialize predictor | |
| print("Loading DTD model...") | |
| predictor = DTDPredictor( | |
| checkpoint_path='checkpoints/dtd_doctamper.pth', | |
| device='auto' | |
| ) | |
| print("Model loaded!") | |
| def predict_tampering(image, quality=90): | |
| """ | |
| Predict document tampering | |
| Args: | |
| image: Input image (PIL Image or numpy array) | |
| quality: JPEG compression quality for DCT analysis | |
| Returns: | |
| Tuple of (original, mask, heatmap) | |
| """ | |
| # Save uploaded image temporarily | |
| import tempfile | |
| with tempfile.NamedTemporaryFile(delete=False, suffix='.jpg') as tmp: | |
| if hasattr(image, 'save'): | |
| image.save(tmp, 'JPEG', quality=95) | |
| else: | |
| from PIL import Image | |
| Image.fromarray(image).save(tmp, 'JPEG', quality=95) | |
| tmp_path = tmp.name | |
| try: | |
| # Run prediction | |
| result = predictor.predict(tmp_path, quality=quality) | |
| return ( | |
| result['original'], | |
| result['mask'], | |
| result['heatmap'] | |
| ) | |
| finally: | |
| import os | |
| os.unlink(tmp_path) | |
| # Create Gradio interface | |
| with gr.Blocks(title="DTD Document Tampering Detection") as demo: | |
| gr.Markdown(""" | |
| # 🔍 DTD: Document Tampering Detection | |
| Upload a document image to detect forged or tampered regions using the DTD (Document Tampering Detector) model. | |
| **How it works:** | |
| - The model analyzes JPEG compression artifacts (DCT coefficients) | |
| - Red regions indicate potential tampering | |
| - Works best on JPEG images of documents | |
| **Paper:** [Towards Robust Tampered Text Detection in Document Image](https://openaccess.thecvf.com/content/CVPR2023/papers/Qu_Towards_Robust_Tampered_Text_Detection_in_Document_Image_New_Dataset_CVPR_2023_paper.pdf) | |
| """) | |
| with gr.Row(): | |
| with gr.Column(): | |
| input_image = gr.Image( | |
| label="Upload Document Image", | |
| type="pil" | |
| ) | |
| quality_slider = gr.Slider( | |
| minimum=75, | |
| maximum=95, | |
| value=90, | |
| step=5, | |
| label="JPEG Quality for DCT Analysis", | |
| info="Higher quality = more sensitive detection" | |
| ) | |
| submit_btn = gr.Button("Detect Tampering", variant="primary") | |
| with gr.Column(): | |
| with gr.Tab("Heatmap Overlay"): | |
| output_heatmap = gr.Image(label="Tampering Heatmap") | |
| with gr.Tab("Binary Mask"): | |
| output_mask = gr.Image(label="Tampering Mask") | |
| with gr.Tab("Original"): | |
| output_original = gr.Image(label="Original Image") | |
| # Examples | |
| gr.Examples( | |
| examples=[ | |
| ["examples/carte.jpeg", 90], | |
| ["examples/TamperedPaystub.jpg", 90], | |
| ["examples/Paystub.jpg", 90], | |
| ], | |
| inputs=[input_image, quality_slider], | |
| outputs=[output_original, output_mask, output_heatmap], | |
| fn=predict_tampering, | |
| cache_examples=False, | |
| ) | |
| # Event handlers | |
| submit_btn.click( | |
| fn=predict_tampering, | |
| inputs=[input_image, quality_slider], | |
| outputs=[output_original, output_mask, output_heatmap] | |
| ) | |
| gr.Markdown(""" | |
| --- | |
| ### ℹ️ About | |
| **DTD (Document Tampering Detector)** is a deep learning model designed to detect forged text in document images. | |
| **Features:** | |
| - Analyzes JPEG compression artifacts using DCT (Discrete Cosine Transform) | |
| - Detects copy-paste, splicing, and text manipulation | |
| - Works on scanned documents, photos of documents, and digital documents | |
| **Citation:** | |
| ```bibtex | |
| @inproceedings{qu2023towards, | |
| title={Towards Robust Tampered Text Detection in Document Image: New Dataset and New Solution}, | |
| author={Qu, Chenfan and Liu, Chongyu and Liu, Yuliang and Chen, Xinhong and Peng, Dezhi and Guo, Fengjun and Jin, Lianwen}, | |
| booktitle={CVPR}, | |
| year={2023} | |
| } | |
| ``` | |
| **Model Architecture:** | |
| - Backbone: VPH (Vision Pyramid Hybrid) + Swin Transformer | |
| - Decoder: Multi-scale Iterative Decoder (MID) | |
| - Input: RGB image + DCT coefficients + Quantization tables | |
| - Output: Binary segmentation mask | |
| **Limitations:** | |
| - Requires JPEG images for DCT analysis | |
| - May produce false positives on low-quality scans | |
| - Performance varies with JPEG compression quality | |
| """) | |
| if __name__ == "__main__": | |
| demo.launch( | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| share=False | |
| ) | |