Yaz Hobooti commited on
Commit
9a42fa7
·
1 Parent(s): d4324ba

Setup HF Space: Gradio app, deps (gradio), apt packages

Browse files
Files changed (3) hide show
  1. app.py +3 -96
  2. apt.txt +3 -0
  3. requirements.txt +1 -0
app.py CHANGED
@@ -1,97 +1,4 @@
1
- import os
2
- import uuid
3
- import json
4
- from flask import Flask, request, render_template, jsonify, send_file
5
- from werkzeug.utils import secure_filename
6
- from pdf_comparator import PDFComparator
7
- import tempfile
8
- import shutil
9
 
10
- app = Flask(__name__)
11
- app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024 # 16MB max file size
12
- app.config['UPLOAD_FOLDER'] = 'uploads'
13
- app.config['RESULTS_FOLDER'] = 'results'
14
-
15
- # Ensure directories exist
16
- os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)
17
- os.makedirs(app.config['RESULTS_FOLDER'], exist_ok=True)
18
-
19
- ALLOWED_EXTENSIONS = {'pdf'}
20
-
21
- def allowed_file(filename):
22
- return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
23
-
24
- @app.route('/')
25
- def index():
26
- return render_template('index.html')
27
-
28
- @app.route('/upload', methods=['POST'])
29
- def upload_files():
30
- if 'pdf1' not in request.files or 'pdf2' not in request.files:
31
- return jsonify({'error': 'Both PDF files are required'}), 400
32
-
33
- pdf1 = request.files['pdf1']
34
- pdf2 = request.files['pdf2']
35
-
36
- if pdf1.filename == '' or pdf2.filename == '':
37
- return jsonify({'error': 'Both PDF files are required'}), 400
38
-
39
- if not (allowed_file(pdf1.filename) and allowed_file(pdf2.filename)):
40
- return jsonify({'error': 'Only PDF files are allowed'}), 400
41
-
42
- # Create unique session directory
43
- session_id = str(uuid.uuid4())
44
- session_dir = os.path.join(app.config['UPLOAD_FOLDER'], session_id)
45
- os.makedirs(session_dir, exist_ok=True)
46
-
47
- # Save uploaded files
48
- pdf1_path = os.path.join(session_dir, secure_filename(pdf1.filename))
49
- pdf2_path = os.path.join(session_dir, secure_filename(pdf2.filename))
50
-
51
- pdf1.save(pdf1_path)
52
- pdf2.save(pdf2_path)
53
-
54
- try:
55
- # Initialize PDF comparator
56
- comparator = PDFComparator()
57
-
58
- # Perform comparison
59
- results = comparator.compare_pdfs(pdf1_path, pdf2_path, session_id)
60
-
61
- # Save results
62
- results_path = os.path.join(app.config['RESULTS_FOLDER'], f'{session_id}_results.json')
63
- with open(results_path, 'w') as f:
64
- json.dump(results, f, indent=2)
65
-
66
- return jsonify({
67
- 'success': True,
68
- 'session_id': session_id,
69
- 'results': results
70
- })
71
-
72
- except Exception as e:
73
- return jsonify({'error': str(e)}), 500
74
-
75
- @app.route('/results/<session_id>')
76
- def get_results(session_id):
77
- results_path = os.path.join(app.config['RESULTS_FOLDER'], f'{session_id}_results.json')
78
-
79
- if not os.path.exists(results_path):
80
- return jsonify({'error': 'Results not found'}), 404
81
-
82
- with open(results_path, 'r') as f:
83
- results = json.load(f)
84
-
85
- return jsonify(results)
86
-
87
- @app.route('/download/<session_id>/<filename>')
88
- def download_file(session_id, filename):
89
- file_path = os.path.join(app.config['UPLOAD_FOLDER'], session_id, filename)
90
-
91
- if not os.path.exists(file_path):
92
- return jsonify({'error': 'File not found'}), 404
93
-
94
- return send_file(file_path, as_attachment=True)
95
-
96
- if __name__ == '__main__':
97
- app.run(debug=True, host='0.0.0.0', port=5000)
 
1
+ import gradio as gr
2
+ from pdf_comparator import create_demo
 
 
 
 
 
 
3
 
4
+ demo = create_demo()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
apt.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ poppler-utils
2
+ tesseract-ocr
3
+ libzbar0
requirements.txt CHANGED
@@ -14,3 +14,4 @@ matplotlib==3.7.2
14
  pandas==2.0.3
15
  reportlab==4.0.4
16
  regex==2023.10.3
 
 
14
  pandas==2.0.3
15
  reportlab==4.0.4
16
  regex==2023.10.3
17
+ gradio==4.44.0