Spaces:

Chamin09
/

BrailleMenuGen

Runtime error

App Files Files Community

BrailleMenuGen / models /text_processor_bk.py

Chamin09

Upload 17 files

87d0988 verified 10 months ago

raw

history blame contribute delete

4.43 kB

	from llama_cpp import Llama
	import os
	import json

	# Initialize the model (will download on first run)
	MODEL_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "../models/llama-3-8b-instruct.Q4_K_M.gguf")

	# Check if model exists, if not provide instructions
	if not os.path.exists(MODEL_PATH):
	os.makedirs(os.path.dirname(MODEL_PATH), exist_ok=True)
	print(f"Model not found at {MODEL_PATH}")
	print("Please download the model from https://huggingface.co/TheBloke/Llama-3-8B-Instruct-GGUF")
	print("and place it in the models directory")

	# Initialize model with lazy loading
	llm = None

	def get_llm():
	"""Get or initialize the LLM."""
	global llm
	if llm is None and os.path.exists(MODEL_PATH):
	llm = Llama(
	model_path=MODEL_PATH,
	n_ctx=4096, # Context window
	n_gpu_layers=-1 # Use GPU if available
	)
	return llm

	def process_menu_text(raw_text):
	"""
	Process raw OCR text using LLM to improve structure and readability.

	Args:
	raw_text: Raw text extracted from menu image

	Returns:
	Processed and structured menu text
	"""
	llm = get_llm()

	if llm is None:
	# Fallback to simple processing if model not available
	return {
	'structured_text': raw_text,
	'menu_sections': [],
	'success': False,
	'error': "LLM model not available"
	}

	# Construct prompt for the LLM
	prompt = f"""
	You are an AI assistant that helps structure menu text from OCR.
	Below is the raw text extracted from a menu image.
	Please clean it up, correct any obvious OCR errors, and structure it properly.
	Identify menu sections, items, and prices.

	RAW MENU TEXT:
	{raw_text}

	Format your response as JSON with the following structure:
	{{
	"menu_sections": [
	{{
	"section_name": "Section name (e.g., Appetizers, Main Course, etc.)",
	"items": [
	{{
	"name": "Item name",
	"description": "Item description if available",
	"price": "Price if available"
	}}
	]
	}}
	]
	}}

	Only respond with the JSON, nothing else.
	"""

	try:
	# Generate response from LLM
	response = llm(prompt, max_tokens=4000, temperature=0.1, stop=["```"])

	# Extract JSON from response
	response_text = response['choices'][0]['text'].strip()

	# Find JSON in the response
	json_start = response_text.find('{')
	json_end = response_text.rfind('}') + 1

	if json_start >= 0 and json_end > json_start:
	json_str = response_text[json_start:json_end]
	menu_data = json.loads(json_str)

	# Reconstruct structured text
	structured_text = ""
	for section in menu_data.get('menu_sections', []):
	structured_text += f"{section.get('section_name', 'Menu Items')}\n"
	structured_text += "-" * len(section.get('section_name', 'Menu Items')) + "\n\n"

	for item in section.get('items', []):
	structured_text += f"{item.get('name', '')}"
	if item.get('price'):
	structured_text += f" - {item.get('price')}"
	structured_text += "\n"

	if item.get('description'):
	structured_text += f" {item.get('description')}\n"

	structured_text += "\n"

	structured_text += "\n"

	return {
	'structured_text': structured_text,
	'menu_data': menu_data,
	'success': True
	}
	else:
	# Fallback to simple processing
	return {
	'structured_text': raw_text,
	'menu_sections': [],
	'success': False,
	'error': "Failed to parse LLM response as JSON"
	}

	except Exception as e:
	return {
	'structured_text': raw_text,
	'menu_sections': [],
	'success': False,
	'error': str(e)
	}