|
|
import os
|
|
|
import base64
|
|
|
import tempfile
|
|
|
import json
|
|
|
from flask import Blueprint, request, jsonify, current_app
|
|
|
from werkzeug.utils import secure_filename
|
|
|
import fitz
|
|
|
import PyPDF2
|
|
|
from PIL import Image
|
|
|
from io import BytesIO
|
|
|
import cv2
|
|
|
import numpy as np
|
|
|
|
|
|
|
|
|
try:
|
|
|
from pix2text import Pix2Text
|
|
|
p2t = Pix2Text(analyzer_config=dict(model_name='mfd'))
|
|
|
except Exception as e:
|
|
|
print(f"Warning: Could not initialize Pix2Text: {e}")
|
|
|
p2t = None
|
|
|
|
|
|
|
|
|
from controller.pix2text_controller import preprocess_image as preprocess_math_image
|
|
|
from controller.scribble_controller import preprocess_image as preprocess_scribble_image
|
|
|
from controller.pdf_controller import extract_text_from_pdf
|
|
|
from controller.table_controller import detect_table, generate_latex_table
|
|
|
|
|
|
chatbot_bp = Blueprint('chatbot_bp', __name__, url_prefix='/chatbot')
|
|
|
|
|
|
UPLOAD_FOLDER = 'static/uploads'
|
|
|
PROCESSED_FOLDER = 'static/processed'
|
|
|
os.makedirs(UPLOAD_FOLDER, exist_ok=True)
|
|
|
os.makedirs(PROCESSED_FOLDER, exist_ok=True)
|
|
|
|
|
|
def allowed_file(filename):
|
|
|
"""Check if file extension is allowed"""
|
|
|
ALLOWED_EXTENSIONS = {'png', 'jpg', 'jpeg', 'pdf'}
|
|
|
return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
|
|
|
|
|
|
def is_table_image(image_path):
|
|
|
"""Check if the image is likely a table"""
|
|
|
try:
|
|
|
|
|
|
rows, cols = detect_table(image_path)
|
|
|
|
|
|
return rows >= 2 and cols >= 2
|
|
|
except Exception as e:
|
|
|
print(f"Error detecting table: {e}")
|
|
|
return False
|
|
|
|
|
|
def process_table_image(image_path):
|
|
|
"""Process table image and convert to LaTeX table"""
|
|
|
try:
|
|
|
|
|
|
rows, cols = detect_table(image_path)
|
|
|
|
|
|
|
|
|
if rows > 0 and cols > 0:
|
|
|
latex_code = generate_latex_table(rows, cols)
|
|
|
return latex_code
|
|
|
else:
|
|
|
return "\\text{Could not detect table structure}"
|
|
|
except Exception as e:
|
|
|
print(f"Error processing table image: {e}")
|
|
|
return f"\\text{{Error processing table: {str(e)}}}"
|
|
|
|
|
|
def process_math_image(image_path):
|
|
|
"""Process math image and convert to LaTeX using Pix2Text"""
|
|
|
try:
|
|
|
if p2t:
|
|
|
|
|
|
result = p2t.recognize(image_path)
|
|
|
|
|
|
|
|
|
if isinstance(result, dict):
|
|
|
latex_code = result.get('text', '')
|
|
|
elif isinstance(result, list):
|
|
|
|
|
|
if result and isinstance(result[0], dict):
|
|
|
latex_code = result[0].get('text', '')
|
|
|
else:
|
|
|
latex_code = str(result)
|
|
|
else:
|
|
|
latex_code = str(result)
|
|
|
|
|
|
|
|
|
if len(latex_code.strip()) < 2:
|
|
|
print("Result too short, trying with preprocessing...")
|
|
|
processed_path = preprocess_math_image(image_path)
|
|
|
result = p2t.recognize(processed_path)
|
|
|
|
|
|
if isinstance(result, dict):
|
|
|
latex_code = result.get('text', '')
|
|
|
elif isinstance(result, list):
|
|
|
if result and isinstance(result[0], dict):
|
|
|
latex_code = result[0].get('text', '')
|
|
|
else:
|
|
|
latex_code = str(result)
|
|
|
else:
|
|
|
latex_code = str(result)
|
|
|
|
|
|
return latex_code
|
|
|
else:
|
|
|
return "\\text{Pix2Text not available}"
|
|
|
except Exception as e:
|
|
|
print(f"Error processing math image: {e}")
|
|
|
return f"\\text{{Error processing math image: {str(e)}}}"
|
|
|
|
|
|
def process_image_for_latex(image_path):
|
|
|
"""Process image and convert to LaTeX, detecting if it's a table or math"""
|
|
|
try:
|
|
|
|
|
|
if is_table_image(image_path):
|
|
|
print("Detected table image, processing as table...")
|
|
|
return process_table_image(image_path)
|
|
|
else:
|
|
|
print("Processing as math equation...")
|
|
|
return process_math_image(image_path)
|
|
|
except Exception as e:
|
|
|
print(f"Error processing image: {e}")
|
|
|
|
|
|
return process_math_image(image_path)
|
|
|
|
|
|
def process_pdf_for_latex(pdf_path):
|
|
|
"""Process PDF and extract LaTeX with better math equation handling"""
|
|
|
try:
|
|
|
|
|
|
text = extract_text_from_pdf(pdf_path)
|
|
|
|
|
|
|
|
|
|
|
|
latex = text.replace('\\', '\\textbackslash ')
|
|
|
|
|
|
|
|
|
math_replacements = {
|
|
|
'∑': '\\sum',
|
|
|
'∏': '\\prod',
|
|
|
'∫': '\\int',
|
|
|
'∞': '\\infty',
|
|
|
'±': '\\pm',
|
|
|
'×': '\\times',
|
|
|
'÷': '\\div',
|
|
|
'≤': '\\leq',
|
|
|
'≥': '\\geq',
|
|
|
'≠': '\\neq',
|
|
|
'≈': '\\approx',
|
|
|
'√': '\\sqrt',
|
|
|
'α': '\\alpha',
|
|
|
'β': '\\beta',
|
|
|
'γ': '\\gamma',
|
|
|
'δ': '\\delta',
|
|
|
'ε': '\\epsilon',
|
|
|
'θ': '\\theta',
|
|
|
'λ': '\\lambda',
|
|
|
'μ': '\\mu',
|
|
|
'π': '\\pi',
|
|
|
'σ': '\\sigma',
|
|
|
'φ': '\\phi',
|
|
|
'ω': '\\omega',
|
|
|
}
|
|
|
|
|
|
for symbol, replacement in math_replacements.items():
|
|
|
latex = latex.replace(symbol, replacement)
|
|
|
|
|
|
|
|
|
import re
|
|
|
|
|
|
latex = re.sub(r'([a-zA-Z])(\d+)', r'\1_\2', latex)
|
|
|
|
|
|
|
|
|
latex = re.sub(r'(\d+)/(\d+)', r'\\frac{\1}{\2}', latex)
|
|
|
|
|
|
|
|
|
latex = latex.replace('_', '\\_').replace('^', '\\^').replace('&', '\\&')
|
|
|
latex = latex.replace('%', '\\%').replace('$', '\\$').replace('#', '\\#')
|
|
|
latex = latex.replace('{', '\\{').replace('}', '\\}')
|
|
|
|
|
|
|
|
|
latex = latex.replace('\\textbackslash ', '\\')
|
|
|
|
|
|
|
|
|
|
|
|
latex = re.sub(r'(\d+)x(\d+)', r'\1 \\times \2', latex)
|
|
|
latex = re.sub(r'(\d+)\^(\d+)', r'\1^\2', latex)
|
|
|
|
|
|
return latex
|
|
|
except Exception as e:
|
|
|
print(f"Error processing PDF: {e}")
|
|
|
return f"\\text{{Error processing PDF: {str(e)}}}"
|
|
|
|
|
|
@chatbot_bp.route('/chat', methods=['POST'])
|
|
|
def chat():
|
|
|
"""Enhanced chat endpoint that handles text, image, and PDF inputs"""
|
|
|
try:
|
|
|
|
|
|
if 'image' in request.files or 'pdf' in request.files:
|
|
|
file = request.files.get('image') or request.files.get('pdf')
|
|
|
|
|
|
if file and file.filename != '':
|
|
|
if allowed_file(file.filename):
|
|
|
try:
|
|
|
|
|
|
filename = secure_filename(file.filename)
|
|
|
|
|
|
name, ext = os.path.splitext(filename)
|
|
|
unique_filename = f"{name}_{os.urandom(8).hex()}{ext}"
|
|
|
|
|
|
|
|
|
filepath = os.path.join(UPLOAD_FOLDER, unique_filename)
|
|
|
file.save(filepath)
|
|
|
|
|
|
|
|
|
if ext.lower() in ['.png', '.jpg', '.jpeg']:
|
|
|
|
|
|
latex_result = process_image_for_latex(filepath)
|
|
|
|
|
|
|
|
|
if "begin{tabular}" in latex_result:
|
|
|
file_type_desc = "table"
|
|
|
else:
|
|
|
file_type_desc = "math equation"
|
|
|
|
|
|
return jsonify({
|
|
|
'success': True,
|
|
|
'response': f"I've processed your image as a {file_type_desc} and converted it to LaTeX:\n\n```\n{latex_result}\n```\n\nYou can copy this LaTeX code and use it in your documents.",
|
|
|
'latex': latex_result,
|
|
|
'file_type': 'image'
|
|
|
})
|
|
|
elif ext.lower() == '.pdf':
|
|
|
|
|
|
latex_result = process_pdf_for_latex(filepath)
|
|
|
|
|
|
return jsonify({
|
|
|
'success': True,
|
|
|
'response': f"I've processed your PDF and extracted the mathematical content in LaTeX format:\n\n```\n{latex_result}\n```\n\nYou can copy this LaTeX code and use it in your documents.",
|
|
|
'latex': latex_result,
|
|
|
'file_type': 'pdf'
|
|
|
})
|
|
|
else:
|
|
|
return jsonify({
|
|
|
'success': False,
|
|
|
'response': "Unsupported file type. Please upload an image (PNG, JPG, JPEG) or PDF file."
|
|
|
}), 400
|
|
|
except Exception as e:
|
|
|
return jsonify({
|
|
|
'success': False,
|
|
|
'response': f"Error processing file: {str(e)}"
|
|
|
}), 500
|
|
|
else:
|
|
|
return jsonify({
|
|
|
'success': False,
|
|
|
'response': "Invalid file type. Please upload an image (PNG, JPG, JPEG) or PDF file."
|
|
|
}), 400
|
|
|
|
|
|
|
|
|
elif request.is_json:
|
|
|
data = request.get_json()
|
|
|
message = data.get('message', '')
|
|
|
|
|
|
|
|
|
if message:
|
|
|
|
|
|
lower_message = message.lower()
|
|
|
|
|
|
if 'fraction' in lower_message or 'frac' in lower_message:
|
|
|
response = 'To write a fraction in LaTeX, use \\frac{numerator}{denominator}. For example: \\frac{1}{2} produces ½.'
|
|
|
elif 'integral' in lower_message or 'int' in lower_message:
|
|
|
response = 'To write an integral in LaTeX, use \\int. For example: \\int_0^1 x^2 dx. For definite integrals, specify limits with _ and ^.'
|
|
|
elif 'sum' in lower_message or 'sigma' in lower_message:
|
|
|
response = 'To write a summation in LaTeX, use \\sum. For example: \\sum_{i=1}^{n} i. Use _ for lower limit and ^ for upper limit.'
|
|
|
elif 'limit' in lower_message or 'lim' in lower_message:
|
|
|
response = 'To write a limit in LaTeX, use \\lim. For example: \\lim_{x \\to 0} \\frac{\\sin x}{x} = 1.'
|
|
|
elif 'matrix' in lower_message or 'array' in lower_message:
|
|
|
response = 'To create a matrix in LaTeX, use \\begin{matrix} ... \\end{matrix}. For example:\n\\begin{matrix}\na & b \\\\\nc & d\n\\end{matrix}'
|
|
|
elif 'table' in lower_message or 'tabular' in lower_message:
|
|
|
response = 'To create a table in LaTeX, use the tabular environment. For example:\n\\begin{tabular}{|c|c|}\n\\hline\nColumn 1 & Column 2 \\\\\n\\hline\nItem 1 & Item 2 \\\\\n\\hline\n\\end{tabular}'
|
|
|
elif 'equation' in lower_message or 'align' in lower_message:
|
|
|
response = 'To write equations in LaTeX, you can use:\n- Inline: $E = mc^2$\n- Display: $$E = mc^2$$\n- Aligned: \\begin{align} x &= y \\\\ y &= z \\end{align}'
|
|
|
elif 'help' in lower_message or 'texlab' in lower_message:
|
|
|
response = "I'm the TexLab Assistant! I can help you with:\n- LaTeX syntax and commands\n- Mathematical notation\n- Document conversion tips\n- Using TexLab features\n\nJust ask me any LaTeX or math question!"
|
|
|
elif 'image' in lower_message or 'picture' in lower_message or 'photo' in lower_message:
|
|
|
response = "You can upload images of math equations or tables by clicking the 'Image' button in the chat interface. I'll convert them to LaTeX code for you!"
|
|
|
elif 'pdf' in lower_message or 'document' in lower_message:
|
|
|
response = "You can upload PDF files by clicking the 'PDF' button in the chat interface. I'll extract the content and convert it to LaTeX format!"
|
|
|
else:
|
|
|
|
|
|
response = "I'm the TexLab Assistant. I can help you with LaTeX syntax, mathematical notation, and document conversion.\n\n✨ New Features:\n• Upload images of math equations or tables for LaTeX conversion\n• Upload PDF files for content extraction\n\nTry asking me something like 'How do I write a fraction in LaTeX?' or 'How do I create a matrix?', or upload an image/PDF file!"
|
|
|
|
|
|
return jsonify({
|
|
|
'success': True,
|
|
|
'response': response
|
|
|
})
|
|
|
|
|
|
|
|
|
else:
|
|
|
return jsonify({
|
|
|
'success': False,
|
|
|
'response': "Please provide a message, image, or PDF file to process."
|
|
|
}), 400
|
|
|
|
|
|
|
|
|
else:
|
|
|
return jsonify({
|
|
|
'success': False,
|
|
|
'response': "Please provide a message, image, or PDF file to process."
|
|
|
}), 400
|
|
|
|
|
|
except Exception as e:
|
|
|
print(f"Error in chat endpoint: {e}")
|
|
|
return jsonify({
|
|
|
'success': False,
|
|
|
'response': "Sorry, I encountered an error processing your request. Please try again."
|
|
|
}), 500 |