from llama_cpp import Llama import os import json # Initialize the model (will download on first run) MODEL_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "../models/llama-3-8b-instruct.Q4_K_M.gguf") # Check if model exists, if not provide instructions if not os.path.exists(MODEL_PATH): os.makedirs(os.path.dirname(MODEL_PATH), exist_ok=True) print(f"Model not found at {MODEL_PATH}") print("Please download the model from https://huggingface.co/TheBloke/Llama-3-8B-Instruct-GGUF") print("and place it in the models directory") # Initialize model with lazy loading llm = None def get_llm(): """Get or initialize the LLM.""" global llm if llm is None and os.path.exists(MODEL_PATH): llm = Llama( model_path=MODEL_PATH, n_ctx=4096, # Context window n_gpu_layers=-1 # Use GPU if available ) return llm def process_menu_text(raw_text): """ Process raw OCR text using LLM to improve structure and readability. Args: raw_text: Raw text extracted from menu image Returns: Processed and structured menu text """ llm = get_llm() if llm is None: # Fallback to simple processing if model not available return { 'structured_text': raw_text, 'menu_sections': [], 'success': False, 'error': "LLM model not available" } # Construct prompt for the LLM prompt = f""" You are an AI assistant that helps structure menu text from OCR. Below is the raw text extracted from a menu image. Please clean it up, correct any obvious OCR errors, and structure it properly. Identify menu sections, items, and prices. RAW MENU TEXT: {raw_text} Format your response as JSON with the following structure: {{ "menu_sections": [ {{ "section_name": "Section name (e.g., Appetizers, Main Course, etc.)", "items": [ {{ "name": "Item name", "description": "Item description if available", "price": "Price if available" }} ] }} ] }} Only respond with the JSON, nothing else. """ try: # Generate response from LLM response = llm(prompt, max_tokens=4000, temperature=0.1, stop=["```"]) # Extract JSON from response response_text = response['choices'][0]['text'].strip() # Find JSON in the response json_start = response_text.find('{') json_end = response_text.rfind('}') + 1 if json_start >= 0 and json_end > json_start: json_str = response_text[json_start:json_end] menu_data = json.loads(json_str) # Reconstruct structured text structured_text = "" for section in menu_data.get('menu_sections', []): structured_text += f"{section.get('section_name', 'Menu Items')}\n" structured_text += "-" * len(section.get('section_name', 'Menu Items')) + "\n\n" for item in section.get('items', []): structured_text += f"{item.get('name', '')}" if item.get('price'): structured_text += f" - {item.get('price')}" structured_text += "\n" if item.get('description'): structured_text += f" {item.get('description')}\n" structured_text += "\n" structured_text += "\n" return { 'structured_text': structured_text, 'menu_data': menu_data, 'success': True } else: # Fallback to simple processing return { 'structured_text': raw_text, 'menu_sections': [], 'success': False, 'error': "Failed to parse LLM response as JSON" } except Exception as e: return { 'structured_text': raw_text, 'menu_sections': [], 'success': False, 'error': str(e) }