Spaces:

aliroohan179
/

aliroohan

Sleeping

App Files Files Community

aliroohan / app.py

aliroohan179

Updated app.py

0791c42 7 months ago

raw

history blame contribute delete

13.9 kB

	from flask import Flask, request, jsonify
	from flask_cors import CORS
	import os
	import io
	import base64
	from PIL import Image, ExifTags
	import pytesseract
	import cv2
	import numpy as np
	from datetime import datetime
	import hashlib
	from pdf2image import convert_from_path
	import tempfile
	from reportlab.pdfgen import canvas
	from reportlab.lib.colors import Color
	from reportlab.lib.pagesizes import letter
	import fitz # PyMuPDF

	app = Flask(__name__)
	CORS(app)

	# Configure upload settings
	UPLOAD_FOLDER = '/tmp/uploads'
	ALLOWED_EXTENSIONS = {'png', 'jpg', 'jpeg', 'gif', 'bmp', 'tiff', 'webp', 'pdf'}
	MAX_FILE_SIZE = 16 * 1024 * 1024 # 16MB

	# Create uploads directory if it doesn't exist
	os.makedirs(UPLOAD_FOLDER, exist_ok=True)

	def allowed_file(filename):
	"""Check if the file extension is allowed."""
	return '.' in filename and \
	filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS

	def extract_text_from_image(image_path):
	"""Extract text from image using OCR."""
	try:
	# Use pytesseract to extract text
	text = pytesseract.image_to_string(Image.open(image_path))

	# Also get detailed data including confidence scores
	data = pytesseract.image_to_data(Image.open(image_path), output_type=pytesseract.Output.DICT)

	# Filter out empty text and low confidence results
	filtered_text = []
	for i in range(len(data['text'])):
	if int(data['conf'][i]) > 30 and data['text'][i].strip():
	filtered_text.append({
	'text': data['text'][i].strip(),
	'confidence': int(data['conf'][i]),
	'bbox': {
	'x': data['left'][i],
	'y': data['top'][i],
	'width': data['width'][i],
	'height': data['height'][i]
	}
	})

	return {
	'raw_text': text.strip(),
	'detailed_text': filtered_text,
	'success': True
	}
	except Exception as e:
	return {
	'raw_text': '',
	'detailed_text': [],
	'success': False,
	'error': str(e)
	}

	def extract_image_metadata(image_path):
	"""Extract metadata from image."""
	try:
	with Image.open(image_path) as img:
	# Basic image info
	metadata = {
	'format': img.format,
	'mode': img.mode,
	'size': {
	'width': img.width,
	'height': img.height
	},
	'has_transparency': img.mode in ('RGBA', 'LA') or 'transparency' in img.info
	}

	# EXIF data
	exif_data = {}
	if hasattr(img, '_getexif') and img._getexif() is not None:
	exif = img._getexif()
	for tag_id, value in exif.items():
	tag = ExifTags.TAGS.get(tag_id, tag_id)
	exif_data[tag] = str(value)

	metadata['exif'] = exif_data

	# File size
	metadata['file_size'] = os.path.getsize(image_path)

	return metadata
	except Exception as e:
	return {'error': str(e)}

	"""Analyze dominant colors in the image."""
	try:
	# Load image with OpenCV
	img = cv2.imread(image_path)
	img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

	# Reshape image to be a list of pixels
	pixels = img_rgb.reshape(-1, 3)

	# Calculate color statistics
	mean_color = np.mean(pixels, axis=0).astype(int).tolist()

	# Find dominant colors using k-means clustering
	from sklearn.cluster import KMeans

	# Use 5 clusters to find 5 dominant colors
	kmeans = KMeans(n_clusters=5, random_state=42, n_init=10)
	kmeans.fit(pixels)

	colors = kmeans.cluster_centers_.astype(int).tolist()

	# Calculate color percentages
	labels = kmeans.labels_
	percentages = []
	total_pixels = len(labels)

	for i in range(5):
	percentage = (np.sum(labels == i) / total_pixels) * 100
	percentages.append(round(percentage, 2))

	# Combine colors with percentages
	dominant_colors = [
	{
	'color': {'r': color[0], 'g': color[1], 'b': color[2]},
	'hex': f"#{color[0]:02x}{color[1]:02x}{color[2]:02x}",
	'percentage': percentages[i]
	}
	for i, color in enumerate(colors)
	]

	# Sort by percentage
	dominant_colors.sort(key=lambda x: x['percentage'], reverse=True)

	return {
	'mean_color': {
	'r': mean_color[0],
	'g': mean_color[1],
	'b': mean_color[2]
	},
	'dominant_colors': dominant_colors
	}
	except Exception as e:
	return {'error': str(e)}


	def draw_text_boxes(image_path, text_data):
	"""Draw boxes around detected text regions."""
	try:
	# Read the image
	img = cv2.imread(image_path)

	# Draw boxes for each detected text region
	for item in text_data['detailed_text']:
	bbox = item['bbox']
	# Draw rectangle
	cv2.rectangle(
	img,
	(bbox['x'], bbox['y']),
	(bbox['x'] + bbox['width'], bbox['y'] + bbox['height']),
	(0, 255, 0), # Green color
	2 # Thickness
	)

	# Save the annotated image
	annotated_path = image_path.replace('.', '_annotated.')
	cv2.imwrite(annotated_path, img)
	return annotated_path
	except Exception as e:
	print(f"Error drawing text boxes: {str(e)}")
	return image_path

	def extract_text_from_pdf(pdf_path):
	"""Extract text from PDF using OCR."""
	try:
	# Convert PDF to images
	images = convert_from_path(pdf_path)

	all_text = []
	all_detailed_text = []

	# Process each page
	for i, image in enumerate(images):
	# Save temporary image
	with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as temp_file:
	temp_path = temp_file.name
	image.save(temp_path, 'PNG')

	# Extract text from the page
	page_text = extract_text_from_image(temp_path)

	# Add page number to the results
	if page_text['success']:
	all_text.append(f"--- Page {i+1} ---\n{page_text['raw_text']}")
	for item in page_text['detailed_text']:
	item['page'] = i + 1
	all_detailed_text.append(item)

	# Clean up temporary file
	os.unlink(temp_path)

	return {
	'raw_text': '\n\n'.join(all_text),
	'detailed_text': all_detailed_text,
	'success': True,
	'total_pages': len(images)
	}
	except Exception as e:
	return {
	'raw_text': '',
	'detailed_text': [],
	'success': False,
	'error': str(e)
	}

	def create_annotated_pdf(original_pdf_path, text_data):
	"""Create a new PDF with highlighted text regions."""
	try:
	# Open the original PDF
	doc = fitz.open(original_pdf_path)
	output_pdf = fitz.open()

	# Process each page
	for page_num in range(len(doc)):
	page = doc[page_num]

	# Create a new page in the output PDF
	output_page = output_pdf.new_page(width=page.rect.width, height=page.rect.height)

	# Copy the original page content
	output_page.show_pdf_page(output_page.rect, doc, page_num)

	# Get text items for this page
	page_text_items = [item for item in text_data['detailed_text'] if item['page'] == page_num + 1]

	# Get the page dimensions
	page_width = page.rect.width
	page_height = page.rect.height

	# Convert PDF to image to get the dimensions Tesseract used
	images = convert_from_path(original_pdf_path, first_page=page_num+1, last_page=page_num+1)
	if images:
	img = images[0]
	img_width, img_height = img.size

	# Calculate scaling factors
	scale_x = page_width / img_width
	scale_y = page_height / img_height

	# Draw filled, semi-transparent rectangles around detected text
	for item in page_text_items:
	bbox = item['bbox']
	# Scale coordinates to PDF space
	rect = fitz.Rect(
	bbox['x'] * scale_x,
	bbox['y'] * scale_y,
	(bbox['x'] + bbox['width']) * scale_x,
	(bbox['y'] + bbox['height']) * scale_y
	)

	# Add a filled rectangle annotation (semi-transparent green)
	annot = output_page.add_rect_annot(rect)
	annot.set_colors(stroke=(0, 1, 0), fill=(0, 1, 0)) # Green
	annot.set_opacity(0.25) # 25% opacity
	annot.update()

	# Save the annotated PDF
	annotated_path = original_pdf_path.replace('.pdf', '_annotated.pdf')
	output_pdf.save(annotated_path)
	output_pdf.close()
	doc.close()

	return annotated_path
	except Exception as e:
	print(f"Error creating annotated PDF: {str(e)}")
	return original_pdf_path

	@app.route('/', methods=['GET'])
	def home():
	"""Health check endpoint."""
	return jsonify({
	'message': 'Image Processing API is running',
	'version': '1.0.0',
	'endpoints': {
	'extract': '/extract - POST - Upload image for data extraction',
	'health': '/ - GET - Health check'
	}
	})

	@app.route('/extract', methods=['POST'])
	def extract_image_data():
	"""Extract visual data from uploaded image or PDF."""

	# Check if image file is in request
	if 'image' not in request.files:
	return jsonify({'error': 'No file provided'}), 400

	file = request.files['image']

	# Check if file is selected
	if file.filename == '':
	return jsonify({'error': 'No file selected'}), 400

	# Check file size
	file.seek(0, os.SEEK_END)
	file_size = file.tell()
	file.seek(0)

	if file_size > MAX_FILE_SIZE:
	return jsonify({'error': f'File too large. Maximum size is {MAX_FILE_SIZE // (1024*1024)}MB'}), 400

	if file and allowed_file(file.filename):
	try:
	# Generate unique filename
	timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
	file_hash = hashlib.md5(file.read()).hexdigest()[:8]
	file.seek(0) # Reset file pointer

	filename = f"{timestamp}_{file_hash}_{file.filename}"
	file_path = os.path.join(UPLOAD_FOLDER, filename)

	# Save uploaded file
	file.save(file_path)

	# Extract text based on file type
	if file.filename.lower().endswith('.pdf'):
	text_data = extract_text_from_pdf(file_path)
	# Create annotated PDF
	annotated_file_path = create_annotated_pdf(file_path, text_data)
	else:
	text_data = extract_text_from_image(file_path)
	# Draw boxes around detected text for images
	annotated_file_path = draw_text_boxes(file_path, text_data)

	# Extract metadata
	metadata = extract_image_metadata(file_path)

	# Convert annotated file to base64
	with open(annotated_file_path, "rb") as f:
	file_base64 = base64.b64encode(f.read()).decode('utf-8')

	# Clean up - remove uploaded files
	os.remove(file_path)
	if annotated_file_path != file_path: # Only remove if it's a different file
	os.remove(annotated_file_path)

	# Prepare response
	response_data = {
	'success': True,
	'timestamp': datetime.now().isoformat(),
	'original_filename': file.filename,
	'file_size': file_size,
	'extracted_text': text_data,
	'metadata': metadata,
	'annotated_file_base64': file_base64
	}

	return jsonify(response_data)

	except Exception as e:
	# Clean up files if they exist
	if 'file_path' in locals() and os.path.exists(file_path):
	os.remove(file_path)
	if 'annotated_file_path' in locals() and os.path.exists(annotated_file_path) and annotated_file_path != file_path:
	os.remove(annotated_file_path)
	return jsonify({
	'success': False,
	'error': f'Error processing file: {str(e)}'
	}), 500

	else:
	return jsonify({
	'error': f'File type not allowed. Allowed types: {", ".join(ALLOWED_EXTENSIONS)}'
	}), 400

	@app.errorhandler(413)
	def too_large(e):
	return jsonify({'error': 'File too large'}), 413

	@app.errorhandler(500)
	def internal_error(e):
	return jsonify({'error': 'Internal server error'}), 500

	if __name__ == '__main__':
	port = int(7860)
	app.run(debug=False, host='0.0.0.0', port=port)