Spaces:

awesomesauce10
/

cheating

Sleeping

App Files Files Community

cheating / app.py

awesomesauce10

Update app.py

74420f7 verified 3 months ago

raw

history blame contribute delete

20.8 kB

	import os
	import io
	import base64
	from PIL import Image
	import gradio as gr
	from openai import OpenAI
	from typing import Optional, Tuple

	def crop_image(image_path: str, top_left: Tuple[int, int], bottom_right: Tuple[int, int], output_path: Optional[str] = None) -> Image.Image:
	"""
	Crop an image using pixel coordinates.

	Args:
	image_path: Path to the input image file
	top_left: Tuple of (x, y) coordinates for the top-left corner of the crop area
	bottom_right: Tuple of (x, y) coordinates for the bottom-right corner of the crop area
	output_path: Optional path to save the cropped image. If None, returns the cropped image.

	Returns:
	PIL Image object of the cropped image

	Raises:
	FileNotFoundError: If the input image file doesn't exist
	ValueError: If coordinates are invalid
	"""
	# Validate input file exists
	if not os.path.exists(image_path):
	raise FileNotFoundError(f"Image file not found: {image_path}")

	# Open the image
	try:
	image = Image.open(image_path)
	except Exception as e:
	raise ValueError(f"Failed to open image: {e}")

	# Extract coordinates
	x1, y1 = top_left
	x2, y2 = bottom_right

	# Validate coordinates
	width, height = image.size
	if x1 < 0 or y1 < 0 or x2 > width or y2 > height:
	raise ValueError(f"Coordinates out of bounds. Image size: {width}x{height}, requested crop: ({x1},{y1}) to ({x2},{y2})")

	if x2 <= x1 or y2 <= y1:
	raise ValueError(f"Invalid crop dimensions. Bottom-right must be greater than top-left: ({x1},{y1}) to ({x2},{y2})")

	# PIL crop box format: (left, upper, right, lower)
	crop_box = (x1, y1, x2, y2)

	# Crop the image
	cropped_image = image.crop(crop_box)

	# Save if output path provided
	if output_path:
	try:
	cropped_image.save(output_path)
	print(f"Cropped image saved to: {output_path}")
	except Exception as e:
	raise ValueError(f"Failed to save cropped image: {e}")

	return cropped_image

	# Initialize OpenAI client with error handling
	def get_openai_client():
	# Try multiple ways to get the API key
	api_key = None

	# Method 1: Environment variable (most common in Hugging Face Spaces)
	api_key = os.getenv("OPENAI_API_KEY")

	# Method 2: Try reading from secrets file (Hugging Face Spaces)
	if not api_key:
	try:
	secrets_file = "/app/secrets.json" # Hugging Face Spaces secrets location
	if os.path.exists(secrets_file):
	import json
	with open(secrets_file, 'r') as f:
	secrets = json.load(f)
	api_key = secrets.get("OPENAI_API_KEY")
	except Exception as e:
	print(f"Could not read secrets file: {e}")

	# Method 3: For local development, try loading .env file
	if not api_key:
	try:
	from dotenv import load_dotenv
	load_dotenv() # Load .env file from current directory
	api_key = os.getenv("OPENAI_API_KEY")
	if api_key:
	print("Loaded API key from .env file (local development)")
	except ImportError:
	print("python-dotenv not installed, skipping .env file loading")
	except Exception as e:
	print(f"Could not load .env file: {e}")

	# Debug logging
	if api_key:
	print(f"Found API key (length: {len(api_key)})")
	else:
	print("No API key found in environment variables, secrets, or .env file")

	if not api_key:
	return None

	try:
	client = OpenAI(api_key=api_key.strip()) # Strip any whitespace
	print("OpenAI client initialized successfully")
	return client
	except Exception as e:
	print(f"Failed to initialize OpenAI client: {e}")
	return None

	client = get_openai_client()

	def process_image_with_gpt4(image_data: str, crop_region: Optional[Tuple[Tuple[int, int], Tuple[int, int]]] = None) -> str:
	"""
	Process image using GPT-4 vision API for OCR/text extraction

	Args:
	image_data: Base64 encoded image data
	crop_region: Optional tuple of ((x1, y1), (x2, y2)) coordinates to crop the image before OCR
	"""
	try:
	if not client:
	return "Error: OpenAI client not initialized - check API key"

	print(f"Processing image data of length: {len(image_data)}")

	# Handle data URL format (data:image/png;base64,...)
	if image_data.startswith('data:'):
	# Extract the base64 part after the comma
	if ',' in image_data:
	image_data = image_data.split(',')[1]
	else:
	return "Error: Invalid data URL format - no comma found"

	# Validate base64
	try:
	image_bytes = base64.b64decode(image_data)
	print(f"Decoded image bytes length: {len(image_bytes)}")
	except Exception as decode_error:
	return f"Error: Failed to decode base64 - {str(decode_error)}"

	# Verify it's a valid image
	try:
	image = Image.open(io.BytesIO(image_bytes))
	print(f"Image validated: {image.size}, mode: {image.mode}")
	except Exception as image_error:
	return f"Error: Failed to validate image - {str(image_error)}"

	# Crop image if crop_region is specified
	if crop_region:
	try:
	top_left, bottom_right = crop_region
	print(f"Applying crop with coordinates: {top_left} to {bottom_right}")

	# PIL crop box format: (left, upper, right, lower)
	crop_box = (top_left[0], top_left[1], bottom_right[0], bottom_right[1])

	# Validate crop coordinates
	width, height = image.size
	x1, y1, x2, y2 = crop_box
	if x1 < 0 or y1 < 0 or x2 > width or y2 > height:
	print(f"Warning: Crop coordinates partially out of bounds. Image size: {width}x{height}, requested crop: {crop_box}")
	# Adjust coordinates to stay within bounds
	x1 = max(0, min(x1, width))
	y1 = max(0, min(y1, height))
	x2 = max(0, min(x2, width))
	y2 = max(0, min(y2, height))
	crop_box = (x1, y1, x2, y2)

	if x2 > x1 and y2 > y1:
	original_size = image.size
	image = image.crop(crop_box)
	print(f"Image cropped from {original_size} to {image.size}")

	# Convert cropped image back to base64
	buffered = io.BytesIO()
	image.save(buffered, format="PNG")
	image_data = base64.b64encode(buffered.getvalue()).decode()
	else:
	print(f"Warning: Invalid crop dimensions, skipping crop: {crop_box}")

	except Exception as crop_error:
	print(f"Warning: Failed to crop image: {crop_error}")
	# Continue with original image

	# Save image for local debugging if LOCAL_DEBUG is enabled
	is_local_debug = os.getenv("LOCAL_DEBUG", "false").lower() == "true"
	if is_local_debug:
	try:
	import datetime
	timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
	debug_filename = f"debug_input_{timestamp}.png"
	Image.open(io.BytesIO(base64.b64decode(image_data))).save(debug_filename)
	print(f"🐛 DEBUG: Saved input image as {debug_filename}")
	except Exception as save_error:
	print(f"Could not save debug image: {save_error}")

	# Process with GPT-4 Vision
	try:
	print("Calling GPT-4 Vision API...")
	response = client.chat.completions.create(
	model="gpt-4o",
	messages=[
	{
	"role": "user",
	"content": [
	{
	"type": "image_url",
	"image_url": {
	"url": f"data:image/png;base64,{image_data}"
	}
	},
	{
	"type": "text",
	"text": "Extract all text from this image. Include every word, number, symbol, and mathematical notation. If there are multiple questions or sections, preserve the structure. Use LaTeX for mathematical expressions."
	}
	]
	}
	],
	max_tokens=1000,
	temperature=0.3
	)

	extracted_text = response.choices[0].message.content.strip()
	print(f"GPT-4 Vision extraction completed. Text length: {len(extracted_text)}")

	if is_local_debug:
	print(f"🐛 DEBUG: Extracted text: '{extracted_text}'")

	return extracted_text

	except Exception as ocr_error:
	error_msg = f"Error: GPT-4 Vision processing failed - {str(ocr_error)}"
	print(f"❌ {error_msg}")
	import traceback
	print(f"❌ Full traceback: {traceback.format_exc()}")
	return error_msg

	except Exception as e:
	return f"Error: Unexpected error - {str(e)}"

	def compare_ocr_with_crop(image_data: str, crop_region: Tuple[Tuple[int, int], Tuple[int, int]]) -> dict:
	"""
	Compare OCR results between cropped and uncropped versions of an image using GPT-4 Vision

	Args:
	image_data: Base64 encoded image data
	crop_region: Tuple of ((x1, y1), (x2, y2)) coordinates for cropping

	Returns:
	Dictionary containing comparison results
	"""
	print("🔍 Starting OCR comparison: cropped vs uncropped (GPT-4 Vision)")
	print(f"📏 Crop region: {crop_region[0]} to {crop_region[1]}")

	# Process uncropped image
	print("\n📄 Processing uncropped image...")
	uncropped_result = process_image_with_gpt4(image_data, crop_region=None)

	# Process cropped image
	print("\n✂️ Processing cropped image...")
	cropped_result = process_image_with_gpt4(image_data, crop_region=crop_region)

	# Analyze results
	results = {
	"crop_region": crop_region,
	"uncropped": {
	"text": uncropped_result,
	"length": len(uncropped_result),
	"is_error": uncropped_result.startswith("Error")
	},
	"cropped": {
	"text": cropped_result,
	"length": len(cropped_result),
	"is_error": cropped_result.startswith("Error")
	},
	"comparison": {
	"texts_match": uncropped_result == cropped_result,
	"length_difference": len(cropped_result) - len(uncropped_result),
	"cropped_has_more_text": len(cropped_result) > len(uncropped_result),
	"both_successful": not (uncropped_result.startswith("Error") or cropped_result.startswith("Error"))
	}
	}

	# Print comparison summary
	print("\n📊 OCR Comparison Results:")
	print(f"Uncropped text length: {results['uncropped']['length']} characters")
	print(f"Cropped text length: {results['cropped']['length']} characters")
	print(f"Length difference: {results['comparison']['length_difference']} characters")
	print(f"Texts match exactly: {results['comparison']['texts_match']}")
	print(f"Both OCR operations successful: {results['comparison']['both_successful']}")

	if results['comparison']['both_successful']:
	if results['comparison']['texts_match']:
	print("✅ Cropped and uncropped images produced identical OCR results")
	elif results['comparison']['cropped_has_more_text']:
	print("📈 Cropped image produced more text than uncropped image")
	else:
	print("📉 Cropped image produced less text than uncropped image")
	else:
	print("❌ One or both OCR operations failed")

	return results

	def get_gpt_answer(question: str) -> str:
	"""
	Send the extracted question to GPT and get an answer
	"""
	try:
	if not client:
	return "Error: OpenAI client not initialized - check API key configuration"

	if not hasattr(client, 'api_key') or not client.api_key:
	return "Error: OpenAI API key not configured"

	response = client.chat.completions.create(
	model="gpt-4",
	messages=[
	{
	"role": "system",
	"content": "You are a helpful assistant that provides concise, accurate answers to academic questions. Keep responses brief but informative."
	},
	{
	"role": "user",
	"content": f"Please answer this question concisely: {question}"
	}
	],
	max_tokens=500,
	temperature=0.3
	)

	return response.choices[0].message.content.strip()
	except Exception as e:
	return f"Error: {str(e)}"

	def process_screenshot_with_crop_comparison(image_data: str, crop_region: Tuple[Tuple[int, int], Tuple[int, int]]) -> dict:
	"""
	Process screenshot with crop comparison - extracts text from both cropped and uncropped versions
	and gets GPT answers for both, then compares the results.

	Args:
	image_data: Base64 encoded image data
	crop_region: Tuple of ((x1, y1), (x2, y2)) coordinates for cropping

	Returns:
	Dictionary containing OCR and GPT results for both versions
	"""
	print("🎯 Starting screenshot processing with crop comparison (GPT-4 Vision)")

	if not image_data:
	return {"error": "No image data provided"}

	# Get OCR comparison results
	ocr_comparison = compare_ocr_with_crop(image_data, crop_region)

	# Get GPT answers for both versions (if OCR was successful)
	gpt_answers = {}

	if not ocr_comparison["uncropped"]["is_error"]:
	print("\n🤖 Getting GPT answer for uncropped text...")
	gpt_answers["uncropped"] = get_gpt_answer(ocr_comparison["uncropped"]["text"])

	if not ocr_comparison["cropped"]["is_error"]:
	print("\n🤖 Getting GPT answer for cropped text...")
	gpt_answers["cropped"] = get_gpt_answer(ocr_comparison["cropped"]["text"])

	# Build final result
	result = {
	"crop_region": crop_region,
	"ocr_comparison": ocr_comparison,
	"gpt_answers": gpt_answers,
	"success": True
	}

	# Add comparison insights
	if "uncropped" in gpt_answers and "cropped" in gpt_answers:
	answers_match = gpt_answers["uncropped"] == gpt_answers["cropped"]
	result["comparison"] = {
	"gpt_answers_match": answers_match,
	"ocr_texts_match": ocr_comparison["comparison"]["texts_match"]
	}

	print(f"\n🎯 Final Comparison:")
	print(f"OCR texts match: {ocr_comparison['comparison']['texts_match']}")
	print(f"GPT answers match: {answers_match}")

	if answers_match:
	print("✅ Cropped and uncropped processing produced identical results")
	else:
	print("⚠️ Cropped and uncropped processing produced different results")

	return result

	def process_screenshot(image_data: str) -> dict:
	"""
	Main processing function that handles the entire pipeline
	"""
	if not image_data:
	return {"error": "No image data provided"}

	# Extract text from image using GPT-4 Vision
	extracted_text = process_image_with_gpt4(image_data)

	if extracted_text.startswith("Error"):
	return {"error": extracted_text}

	# Get GPT answer
	gpt_answer = get_gpt_answer(extracted_text)

	return {
	"extracted_text": extracted_text,
	"answer": gpt_answer,
	"success": True
	}

	# Gradio interface for testing
	def gradio_interface(image):
	"""
	Gradio interface for testing the GPT-4 Vision + GPT pipeline
	"""
	if image is None:
	return "No image provided", "No answer available"

	# Convert PIL image to base64
	buffered = io.BytesIO()
	image.save(buffered, format="PNG")
	img_str = base64.b64encode(buffered.getvalue()).decode()

	result = process_screenshot(img_str)

	if "error" in result:
	return result["error"], "Error occurred"

	return result["extracted_text"], result["answer"]

	# Create Gradio app
	demo = gr.Interface(
	fn=gradio_interface,
	inputs=gr.Image(type="pil", label="Upload Screenshot"),
	outputs=[
	gr.Textbox(label="Extracted Text", lines=3),
	gr.Textbox(label="GPT Answer", lines=5)
	],
	title="Educational Question Solver",
	description="Upload a screenshot of a question and get an AI-powered answer for educational purposes.",
	examples=None
	)

	# FastAPI setup for proper API endpoints
	from fastapi import FastAPI, HTTPException
	from fastapi.middleware.cors import CORSMiddleware
	from pydantic import BaseModel
	import uvicorn

	app = FastAPI(title="Educational Question Solver API")

	# Add CORS middleware
	app.add_middleware(
	CORSMiddleware,
	allow_origins=["*"],
	allow_credentials=True,
	allow_methods=["*"],
	allow_headers=["*"],
	)

	class ImageRequest(BaseModel):
	image_data: str

	@app.post("/api/process")
	async def process_image_endpoint(request: ImageRequest):
	"""
	API endpoint that the Electron app will call
	"""
	try:
	result = process_screenshot(request.image_data)
	return result
	except Exception as e:
	raise HTTPException(status_code=500, detail=str(e))

	@app.get("/")
	async def root():
	return {"message": "Educational Question Solver API is running"}

	@app.get("/health")
	async def health_check():
	return {
	"status": "healthy",
	"openai_client": client is not None,
	"ocr_model": "gpt-4o-vision",
	"local_debug": os.getenv("LOCAL_DEBUG", "false").lower() == "true",
	"openai_api_key_length": len(os.getenv("OPENAI_API_KEY", "")) if os.getenv("OPENAI_API_KEY") else 0
	}

	@app.get("/test-openai")
	async def test_openai():
	"""Test endpoint to check OpenAI functionality"""
	if not client:
	return {"error": "OpenAI client not initialized"}

	try:
	# Simple test call
	response = client.chat.completions.create(
	model="gpt-3.5-turbo",
	messages=[{"role": "user", "content": "Say 'Hello'"}],
	max_tokens=10
	)
	return {
	"success": True,
	"response": response.choices[0].message.content.strip()
	}
	except Exception as e:
	return {"error": f"OpenAI test failed: {str(e)}"}

	@app.get("/test-vision")
	async def test_vision():
	"""Test endpoint to check GPT-4 Vision functionality"""
	if not client:
	return {"error": "OpenAI client not initialized"}

	try:
	# Create a simple test image
	from PIL import Image, ImageDraw
	test_image = Image.new('RGB', (200, 100), color='white')
	draw = ImageDraw.Draw(test_image)
	draw.text((10, 10), "TEST IMAGE", fill='black')

	# Convert to base64
	buffered = io.BytesIO()
	test_image.save(buffered, format="PNG")
	img_str = base64.b64encode(buffered.getvalue()).decode()

	# Test with GPT-4 Vision
	response = client.chat.completions.create(
	model="gpt-4o",
	messages=[
	{
	"role": "user",
	"content": [
	{
	"type": "image_url",
	"image_url": {
	"url": f"data:image/png;base64,{img_str}"
	}
	},
	{
	"type": "text",
	"text": "What text do you see in this image?"
	}
	]
	}
	],
	max_tokens=100
	)

	return {
	"success": True,
	"response": response.choices[0].message.content.strip()
	}
	except Exception as e:
	return {"error": f"Vision test failed: {str(e)}"}

	# Mount Gradio app
	app = gr.mount_gradio_app(app, demo, path="/gradio")

	if __name__ == "__main__":
	# For local testing
	uvicorn.run(app, host="0.0.0.0", port=7860)