BrailleMenuGen / models /text_processor_bk.py
Chamin09's picture
Upload 17 files
87d0988 verified
from llama_cpp import Llama
import os
import json
# Initialize the model (will download on first run)
MODEL_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "../models/llama-3-8b-instruct.Q4_K_M.gguf")
# Check if model exists, if not provide instructions
if not os.path.exists(MODEL_PATH):
os.makedirs(os.path.dirname(MODEL_PATH), exist_ok=True)
print(f"Model not found at {MODEL_PATH}")
print("Please download the model from https://huggingface.co/TheBloke/Llama-3-8B-Instruct-GGUF")
print("and place it in the models directory")
# Initialize model with lazy loading
llm = None
def get_llm():
"""Get or initialize the LLM."""
global llm
if llm is None and os.path.exists(MODEL_PATH):
llm = Llama(
model_path=MODEL_PATH,
n_ctx=4096, # Context window
n_gpu_layers=-1 # Use GPU if available
)
return llm
def process_menu_text(raw_text):
"""
Process raw OCR text using LLM to improve structure and readability.
Args:
raw_text: Raw text extracted from menu image
Returns:
Processed and structured menu text
"""
llm = get_llm()
if llm is None:
# Fallback to simple processing if model not available
return {
'structured_text': raw_text,
'menu_sections': [],
'success': False,
'error': "LLM model not available"
}
# Construct prompt for the LLM
prompt = f"""
You are an AI assistant that helps structure menu text from OCR.
Below is the raw text extracted from a menu image.
Please clean it up, correct any obvious OCR errors, and structure it properly.
Identify menu sections, items, and prices.
RAW MENU TEXT:
{raw_text}
Format your response as JSON with the following structure:
{{
"menu_sections": [
{{
"section_name": "Section name (e.g., Appetizers, Main Course, etc.)",
"items": [
{{
"name": "Item name",
"description": "Item description if available",
"price": "Price if available"
}}
]
}}
]
}}
Only respond with the JSON, nothing else.
"""
try:
# Generate response from LLM
response = llm(prompt, max_tokens=4000, temperature=0.1, stop=["```"])
# Extract JSON from response
response_text = response['choices'][0]['text'].strip()
# Find JSON in the response
json_start = response_text.find('{')
json_end = response_text.rfind('}') + 1
if json_start >= 0 and json_end > json_start:
json_str = response_text[json_start:json_end]
menu_data = json.loads(json_str)
# Reconstruct structured text
structured_text = ""
for section in menu_data.get('menu_sections', []):
structured_text += f"{section.get('section_name', 'Menu Items')}\n"
structured_text += "-" * len(section.get('section_name', 'Menu Items')) + "\n\n"
for item in section.get('items', []):
structured_text += f"{item.get('name', '')}"
if item.get('price'):
structured_text += f" - {item.get('price')}"
structured_text += "\n"
if item.get('description'):
structured_text += f" {item.get('description')}\n"
structured_text += "\n"
structured_text += "\n"
return {
'structured_text': structured_text,
'menu_data': menu_data,
'success': True
}
else:
# Fallback to simple processing
return {
'structured_text': raw_text,
'menu_sections': [],
'success': False,
'error': "Failed to parse LLM response as JSON"
}
except Exception as e:
return {
'structured_text': raw_text,
'menu_sections': [],
'success': False,
'error': str(e)
}