Spaces:

mgbam
/

builder

Running

App Files Files Community

builder / file_processing.py

mgbam

Upload 6 files

51d8a3f verified 9 months ago

raw

history blame

3.04 kB

	import os
	import mimetypes
	import PyPDF2
	import docx
	import cv2
	import numpy as np
	from PIL import Image
	import pytesseract

	def process_image_for_model(image):
	"""Convert image to base64 for model input"""
	if image is None:
	return None

	# Convert numpy array to PIL Image if needed
	import io
	import base64

	# Handle numpy array from Gradio
	if isinstance(image, np.ndarray):
	image = Image.fromarray(image)

	buffer = io.BytesIO()
	image.save(buffer, format='PNG')
	img_str = base64.b64encode(buffer.getvalue()).decode()
	return f"data:image/png;base64,{img_str}"

	def extract_text_from_image(image_path):
	"""Extract text from image using OCR"""
	try:
	# Check if tesseract is available
	try:
	pytesseract.get_tesseract_version()
	except Exception:
	return "Error: Tesseract OCR is not installed. Please install Tesseract to extract text from images. See install_tesseract.md for instructions."

	image = cv2.imread(image_path)
	if image is None:
	return "Error: Could not read image file"

	image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
	gray = cv2.cvtColor(image_rgb, cv2.COLOR_RGB2GRAY)
	_, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
	text = pytesseract.image_to_string(binary, config='--psm 6')
	return text.strip() if text.strip() else "No text found in image"

	except Exception as e:
	return f"Error extracting text from image: {e}"

	def extract_text_from_file(file_path):
	if not file_path:
	return ""
	ext = os.path.splitext(file_path)[1].lower()
	try:
	if ext == ".pdf":
	with open(file_path, "rb") as f:
	reader = PyPDF2.PdfReader(f)
	return "\n".join(page.extract_text() or "" for page in reader.pages)
	elif ext in [".txt", ".md", ".csv"]:
	with open(file_path, "r", encoding="utf-8") as f:
	return f.read()
	elif ext == ".docx":
	doc = docx.Document(file_path)
	return "\n".join([para.text for para in doc.paragraphs])
	elif ext.lower() in [".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".tif", ".gif", ".webp"]:
	return extract_text_from_image(file_path)
	else:
	return ""
	except Exception as e:
	return f"Error extracting text: {e}"

	def create_multimodal_message(text, image=None):
	"""Create a multimodal message with text and optional image"""
	if image is None:
	return {"role": "user", "content": text}

	content = [
	{
	"type": "text",
	"text": text
	},
	{
	"type": "image_url",
	"image_url": {
	"url": process_image_for_model(image)
	}
	}
	]

	return {"role": "user", "content": content}