magic-square / app.py
jy-kong's picture
Update app.py
9d3107f verified
# -*- coding: utf-8 -*-
"""OCR Deepseek_Magic Square.ipynb
Automatically generated by Colab.
Original file is located at
https://colab.research.google.com/drive/1zQzTqKB2bF1TborSc3x_SR83gtsXGswk
"""
# Install necessary libraries quietly (-q suppresses output)
#!pip install gradio torch pillow easyocr -q
# Import necessary libraries
import os # Provides access to environment variables and file system operations
import numpy as np # NumPy for numerical operations and array manipulation
from PIL import Image # Image processing library
import easyocr # Optical Character Recognition (OCR) for text extraction from images
import requests # For making HTTP requests (e.g., fetching data from URLs)
import json # Handling JSON data
import gradio as gr # Building interactive web-based applications
import torch # Explicitly import torch
import torchvision # Explicitly import torchvision
# Initialize OCR reader
reader = easyocr.Reader(['en'])
# DeepSeek API configuration
#from google.colab import userdata
#API_KEY = userdata.get('deepseek_api') # Retrieves the secret from environment variables
API_KEY = os.getenv("deepseek_api") # Retrieves the key from Hugging Face environment
DEEPSEEK_API_URL = "https://api.deepseek.com/v1/chat/completions"
def extract_numbers(image_path):
"""Extracts all characters from an image and arranges them into a 3x3 grid, replacing non-digits with '?'."""
results = reader.readtext(image_path, detail=1) # Get bounding boxes with text
detected_cells = []
for (bbox, text, prob) in results:
# Replace non-digit characters with '?'
detected_cells.append(text if text.isdigit() else '?') # Collect all detected text, replace non-digits with '?'
if not detected_cells:
return [['?', '?', '?'], ['?', '?', '?'], ['?', '?', '?']] # Return a grid with '?' if nothing is detected
# Step 1: Create a 3x3 grid and fill it with the detected characters row by row
grid = [['?', '?', '?'], ['?', '?', '?'], ['?', '?', '?']] # Initialize with '?' as placeholders
i = 0 # Start filling from the first detected character
# Step 2: Place the detected characters in the grid
for row in range(3):
for col in range(3):
if i < len(detected_cells):
grid[row][col] = detected_cells[i] # Place the detected character in the grid
i += 1
return grid
def format_puzzle_for_deepseek(grid):
"""Formats the extracted puzzle with variable placeholders for missing values."""
variables = {}
variable_index = 0
formatted_grid = []
for row in grid:
formatted_row = []
for cell in row:
if cell == "?":
variable_name = chr(65 + variable_index) # Assign 'A', 'B', 'C'...
variables[variable_name] = "?"
formatted_row.append(variable_name)
variable_index += 1
else:
formatted_row.append(cell)
formatted_grid.append(formatted_row)
return formatted_grid, variables
def solve_with_deepseek(formatted_grid, variables):
"""Sends the formatted puzzle to DeepSeek-Reasoner for step-by-step solving."""
puzzle_text = "\n".join(["\t".join(row) for row in formatted_grid])
prompt = (
"You are an AI specialized in solving magic square puzzles. Analyze the following grid, where missing values are already "
"assigned as variables (e.g., A, B, C, D, E). Provide the correct solution with step-by-step reasoning. Ensure the sum of "
"each row, column, and diagonal is the same. Duplicates are not allowed in any row, column, or diagonal.\n\n"
f"Given Question:\n{puzzle_text}\n\n"
"The missing values are represented as variables (e.g., A, B, C, D, E). Provide a structured solution."
"\nFormat your response strictly as follows:\n"
"1. **Given Question**:\n - (grid format with missing values as variables)\n"
"2. **Step-by-step Reasoning**:\n - Identify the missing values logically\n - Explain the rules used\n - Calculate step-by-step\n"
"3. **Final Answer**:\n - Completed grid\n"
"4. **Validation**:\n - Verify all row/column sums\n"
)
data = {
"model": "deepseek-reasoner",
"messages": [{"role": "user", "content": prompt}],
"temperature": 0,
"max_tokens": 2000
}
headers = {
"Authorization": f"Bearer {API_KEY}",
"Content-Type": "application/json"
}
try:
response = requests.post(DEEPSEEK_API_URL, headers=headers, json=data, timeout=300)
response.raise_for_status()
result = response.json()
return result.get("choices", [{}])[0].get("message", {}).get("content", "Error: No response content.")
except requests.exceptions.Timeout:
return "Error: DeepSeek API request timed out. Please try again."
except requests.exceptions.RequestException as e:
return f"Error: DeepSeek API failed: {str(e)}"
def solve_magic_square(image):
"""Main function to extract, format, solve, and validate a magic square puzzle."""
image_path = "uploaded_magic_square.png"
image.save(image_path)
extracted_grid = extract_numbers(image_path)
formatted_grid, variables = format_puzzle_for_deepseek(extracted_grid)
solution = solve_with_deepseek(formatted_grid, variables)
return solution
# Create a Gradio interface
iface = gr.Interface(
fn=solve_magic_square, # Function to call for OCR and solving
inputs=gr.Image(type="pil"), # Input type: PIL image
outputs=gr.Textbox(), # Output type: Textbox to display extracted and solved result
title="Magic Square Puzzle Solver", # Title of the interface
description=(
"Upload an image of a 3x3 Magic Square puzzle. \n\n"
"- A Magic Square is a 3x3 grid where the sum of each row, column, and diagonal is the same. \n"
"- The puzzle involves solving for missing values represented by variables (e.g., A, B, C, D, E). \n"
"- Please note that this tool only works with 3x3 puzzles. Duplicates are not allowed in any row, column, or diagonal.\n"
), # Detailed description of the game rules
allow_flagging="never" # Disables the flagging feature for now
)
# Launch the Gradio interface
iface.launch(debug=True)