|
|
import os
|
|
|
import json
|
|
|
import base64
|
|
|
import io
|
|
|
from flask import Blueprint, request, render_template, jsonify, current_app
|
|
|
import fitz
|
|
|
import PyPDF2
|
|
|
from PIL import Image
|
|
|
import tempfile
|
|
|
|
|
|
pdflly_bp = Blueprint('pdflly', __name__, url_prefix='/pdflly')
|
|
|
|
|
|
def extract_text_from_pdf(filepath, page_number=None, coordinates=None):
|
|
|
"""Extract text from PDF using PyMuPDF for better accuracy"""
|
|
|
try:
|
|
|
doc = fitz.open(filepath)
|
|
|
|
|
|
if page_number is not None:
|
|
|
|
|
|
page = doc[page_number]
|
|
|
|
|
|
if coordinates:
|
|
|
|
|
|
x, y, width, height = coordinates
|
|
|
rect = fitz.Rect(x, y, x + width, y + height)
|
|
|
text = page.get_text("text", clip=rect)
|
|
|
else:
|
|
|
|
|
|
text = page.get_text("text")
|
|
|
else:
|
|
|
|
|
|
text = ""
|
|
|
for page in doc:
|
|
|
text += page.get_text("text")
|
|
|
|
|
|
doc.close()
|
|
|
return text.strip()
|
|
|
except Exception as e:
|
|
|
print(f"Error extracting text with PyMuPDF: {e}")
|
|
|
|
|
|
return extract_text_with_pypdf2(filepath, page_number, coordinates)
|
|
|
|
|
|
def extract_text_with_pypdf2(filepath, page_number=None, coordinates=None):
|
|
|
"""Fallback text extraction using PyPDF2"""
|
|
|
try:
|
|
|
with open(filepath, 'rb') as file:
|
|
|
reader = PyPDF2.PdfReader(file)
|
|
|
|
|
|
if page_number is not None and page_number < len(reader.pages):
|
|
|
page = reader.pages[page_number]
|
|
|
return page.extract_text()
|
|
|
else:
|
|
|
text = ""
|
|
|
for page in reader.pages:
|
|
|
text += page.extract_text()
|
|
|
return text
|
|
|
except Exception as e:
|
|
|
return f"Error extracting text: {str(e)}"
|
|
|
|
|
|
def convert_text_to_latex(text):
|
|
|
"""Simple conversion of text to LaTeX format"""
|
|
|
|
|
|
|
|
|
return text.replace('\\', '\\\\').replace('_', '\\_').replace('^', '\\^').replace('&', '\\&')
|
|
|
|
|
|
@pdflly_bp.route('/')
|
|
|
def pdflly_converter():
|
|
|
"""Render the PDFly converter page"""
|
|
|
return render_template('pdflly.html')
|
|
|
|
|
|
@pdflly_bp.route('/upload', methods=['POST'])
|
|
|
def upload_pdf():
|
|
|
"""Handle PDF file upload"""
|
|
|
if 'pdf_file' not in request.files:
|
|
|
return jsonify({'success': False, 'error': 'No file provided'})
|
|
|
|
|
|
file = request.files['pdf_file']
|
|
|
|
|
|
if file.filename == '':
|
|
|
return jsonify({'success': False, 'error': 'No file selected'})
|
|
|
|
|
|
if file and file.filename.lower().endswith('.pdf'):
|
|
|
try:
|
|
|
|
|
|
temp_dir = tempfile.gettempdir()
|
|
|
filename = file.filename
|
|
|
filepath = os.path.join(temp_dir, filename)
|
|
|
file.save(filepath)
|
|
|
|
|
|
|
|
|
with open(filepath, 'rb') as f:
|
|
|
reader = PyPDF2.PdfReader(f)
|
|
|
page_count = len(reader.pages)
|
|
|
|
|
|
return jsonify({
|
|
|
'success': True,
|
|
|
'filename': filename,
|
|
|
'filepath': filepath,
|
|
|
'pages': page_count
|
|
|
})
|
|
|
except Exception as e:
|
|
|
return jsonify({'success': False, 'error': f'Upload failed: {str(e)}'})
|
|
|
|
|
|
return jsonify({'success': False, 'error': 'Invalid file type. Please upload a PDF file.'})
|
|
|
|
|
|
@pdflly_bp.route('/process', methods=['POST'])
|
|
|
def process_pdf():
|
|
|
"""Process PDF and convert to LaTeX"""
|
|
|
try:
|
|
|
data = request.get_json()
|
|
|
filename = data.get('filename')
|
|
|
coordinates = data.get('coordinates')
|
|
|
page = data.get('page', 0)
|
|
|
convert_all = data.get('convert_all', False)
|
|
|
|
|
|
if not filename:
|
|
|
return jsonify({'success': False, 'error': 'No filename provided'})
|
|
|
|
|
|
|
|
|
temp_dir = tempfile.gettempdir()
|
|
|
filepath = os.path.join(temp_dir, filename)
|
|
|
|
|
|
if not os.path.exists(filepath):
|
|
|
return jsonify({'success': False, 'error': 'File not found'})
|
|
|
|
|
|
if convert_all:
|
|
|
|
|
|
text = extract_text_from_pdf(filepath)
|
|
|
elif coordinates:
|
|
|
|
|
|
text = extract_text_from_pdf(filepath, page, coordinates)
|
|
|
else:
|
|
|
|
|
|
text = extract_text_from_pdf(filepath, page)
|
|
|
|
|
|
|
|
|
latex = convert_text_to_latex(text)
|
|
|
|
|
|
return jsonify({
|
|
|
'success': True,
|
|
|
'latex': latex,
|
|
|
'text': text
|
|
|
})
|
|
|
except Exception as e:
|
|
|
return jsonify({'success': False, 'error': f'Processing failed: {str(e)}'})
|
|
|
|
|
|
@pdflly_bp.route('/solve', methods=['POST'])
|
|
|
def solve_equation():
|
|
|
"""Solve mathematical equations in LaTeX"""
|
|
|
try:
|
|
|
data = request.get_json()
|
|
|
latex = data.get('latex', '')
|
|
|
|
|
|
|
|
|
|
|
|
solution = {
|
|
|
'type': 'expression',
|
|
|
'result': f"Simplified: {latex}"
|
|
|
}
|
|
|
|
|
|
return jsonify({
|
|
|
'success': True,
|
|
|
'solution': solution
|
|
|
})
|
|
|
except Exception as e:
|
|
|
return jsonify({'success': False, 'error': f'Solving failed: {str(e)}'}) |