Yaz Hobooti
Increase PDF resolution: DPI from 300 to 600, scaling factors improved for better OCR and barcode detection
e7a28e8
| import os | |
| import uuid | |
| import json | |
| from flask import Flask, request, render_template, jsonify, send_file | |
| from werkzeug.utils import secure_filename | |
| from pdf_comparator import PDFComparator | |
| import tempfile | |
| import shutil | |
| app = Flask(__name__) | |
| app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024 # 16MB max file size | |
| app.config['UPLOAD_FOLDER'] = 'uploads' | |
| app.config['RESULTS_FOLDER'] = 'results' | |
| # Ensure directories exist | |
| os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True) | |
| os.makedirs(app.config['RESULTS_FOLDER'], exist_ok=True) | |
| ALLOWED_EXTENSIONS = {'pdf'} | |
| def allowed_file(filename): | |
| return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS | |
| def index(): | |
| return render_template('index.html') | |
| def upload_files(): | |
| if 'pdf1' not in request.files or 'pdf2' not in request.files: | |
| return jsonify({'error': 'Both PDF files are required'}), 400 | |
| pdf1 = request.files['pdf1'] | |
| pdf2 = request.files['pdf2'] | |
| if pdf1.filename == '' or pdf2.filename == '': | |
| return jsonify({'error': 'Both PDF files are required'}), 400 | |
| if not (allowed_file(pdf1.filename) and allowed_file(pdf2.filename)): | |
| return jsonify({'error': 'Only PDF files are allowed'}), 400 | |
| # Create unique session directory | |
| session_id = str(uuid.uuid4()) | |
| session_dir = os.path.join(app.config['UPLOAD_FOLDER'], session_id) | |
| os.makedirs(session_dir, exist_ok=True) | |
| # Save uploaded files | |
| pdf1_path = os.path.join(session_dir, secure_filename(pdf1.filename)) | |
| pdf2_path = os.path.join(session_dir, secure_filename(pdf2.filename)) | |
| pdf1.save(pdf1_path) | |
| pdf2.save(pdf2_path) | |
| try: | |
| # Initialize PDF comparator | |
| comparator = PDFComparator() | |
| # Perform comparison | |
| results = comparator.compare_pdfs(pdf1_path, pdf2_path, session_id) | |
| # Save results | |
| results_path = os.path.join(app.config['RESULTS_FOLDER'], f'{session_id}_results.json') | |
| with open(results_path, 'w') as f: | |
| json.dump(results, f, indent=2) | |
| return jsonify({ | |
| 'success': True, | |
| 'session_id': session_id, | |
| 'results': results | |
| }) | |
| except Exception as e: | |
| return jsonify({'error': str(e)}), 500 | |
| def get_results(session_id): | |
| results_path = os.path.join(app.config['RESULTS_FOLDER'], f'{session_id}_results.json') | |
| if not os.path.exists(results_path): | |
| return jsonify({'error': 'Results not found'}), 404 | |
| with open(results_path, 'r') as f: | |
| results = json.load(f) | |
| return jsonify(results) | |
| def download_file(session_id, filename): | |
| file_path = os.path.join(app.config['UPLOAD_FOLDER'], session_id, filename) | |
| if not os.path.exists(file_path): | |
| return jsonify({'error': 'File not found'}), 404 | |
| return send_file(file_path, as_attachment=True) | |
| if __name__ == '__main__': | |
| app.run(debug=True, host='0.0.0.0', port=5000) | |