Spaces:
Running
Running
File size: 11,458 Bytes
92a22cd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 |
import json
import os
import requests
from concurrent.futures import ThreadPoolExecutor, as_completed
from jsonschema import validate, ValidationError
import uuid
from flask import current_app, url_for
from werkzeug.utils import secure_filename
import sqlite3 # Import sqlite3
import sys
# Ensure current directory is in Python path for local imports
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
from database import get_db_connection
from utils import create_a4_pdf_from_images
# JSON v3.0 Schema for validation
JSON_V3_SCHEMA = {
"type": "object",
"properties": {
"version": {"type": "string", "const": "3.0"},
"source": {"type": "string"},
"test_name": {"type": "string"},
"test_id": {"type": "string"},
"test_mapping_id": {"type": "string"},
"metadata": {"type": "object"},
"config": {
"type": "object",
"properties": {
"statuses_to_include": {"type": "array", "items": {"type": "string"}},
"layout": {
"type": "object",
"properties": {
"images_per_page": {"type": "integer"},
"orientation": {"type": "string"}
},
"required": ["images_per_page", "orientation"]
}
},
"required": ["statuses_to_include", "layout"]
},
"questions": {
"type": "array",
"items": {
"type": "object",
"properties": {
"question_number": {"type": "string"},
"image_url": {"type": "string", "format": "uri"},
"status": {"type": "string"},
"marked_solution": {"type": "string"},
"correct_solution": {"type": "string"},
"subject": {"type": "string"},
"chapter": {"type": "string"},
"topic": {"type": "string"},
"time_taken": {"type": "integer"}
},
"required": ["question_number", "image_url", "status", "marked_solution", "correct_solution", "subject", "time_taken"]
}
},
"view": {"type": "boolean"}
},
"required": ["version", "source", "test_name", "test_id", "test_mapping_id", "config", "questions", "view"]
}
class JSONProcessorV3:
def __init__(self, data=None):
self.data = data
def validate(self):
"""Validates the JSON data against the v3.0 schema."""
try:
validate(instance=self.data, schema=JSON_V3_SCHEMA)
return True
except ValidationError as e:
raise ValueError(f"Schema validation failed: {e.message}")
def download_image_from_url(self, url, save_path, timeout=30):
"""Downloads an image from a URL and saves it to a path."""
try:
response = requests.get(url, timeout=timeout)
response.raise_for_status()
with open(save_path, 'wb') as f:
f.write(response.content)
return save_path
except requests.exceptions.RequestException as e:
print(f"Error downloading image from {url}: {e}") # Keep print for tests
if current_app:
current_app.logger.error(f"Error downloading image from {url}: {e}")
return None
def download_images_parallel(self, questions, output_dir, session_id, max_workers=10):
"""Downloads all images in parallel and returns a map of question number to local path."""
image_paths = {}
with ThreadPoolExecutor(max_workers=max_workers) as executor:
future_to_question = {
executor.submit(
self.download_image_from_url,
q['image_url'],
os.path.join(output_dir, f"{session_id}_q_{q['question_number']}.png")
): q for q in questions if q.get('image_url')
}
for future in as_completed(future_to_question):
question = future_to_question[future]
url = question['image_url']
try:
path = future.result()
if path:
image_paths[question['question_number']] = path
current_app.logger.info(f"Successfully downloaded image from {url}")
else:
current_app.logger.error(f"Failed to download image from {url}")
except Exception as e:
current_app.logger.error(f"Error processing image for question {question.get('question_number')} from {url}: {e}")
return image_paths
def process(self, user_id=1): # Default user_id for now, replace with actual user
"""Main processing logic for the v3.0 payload, including DB insertion and PDF generation."""
if not self.data:
raise ValueError("No data provided to process.")
current_app.logger.info("Starting processing of JSON v3.0 payload.")
current_app.logger.info(f"Test Name: {self.data.get('test_name')}")
current_app.logger.info(f"Test ID: {self.data.get('test_id')}")
current_app.logger.info(f"Metadata: {self.data.get('metadata')}")
if not self.validate():
raise ValueError("Schema validation failed.")
conn = get_db_connection()
try:
test_name = self.data['test_name']
test_id = self.data['test_id']
test_mapping_id = self.data['test_mapping_id']
questions_payload = self.data['questions']
view_mode = self.data.get('view', False)
metadata = json.dumps(self.data.get('metadata', {})) # Store metadata as JSON string
config = self.data.get('config', {})
layout = config.get('layout', {})
images_per_page = layout.get('images_per_page', 4)
orientation = layout.get('orientation', 'portrait')
session_id = str(uuid.uuid4())
original_filename = f"{test_name}.json" # Name of the JSON file that was uploaded
conn.execute(
'INSERT INTO sessions (id, original_filename, user_id, test_id, test_mapping_id, source, metadata) VALUES (?, ?, ?, ?, ?, ?, ?)',
(session_id, original_filename, user_id, test_id, test_mapping_id, self.data.get('source', 'manual'), metadata)
)
processed_folder = current_app.config.get('PROCESSED_FOLDER', 'processed')
os.makedirs(processed_folder, exist_ok=True)
current_app.logger.info(f"Downloading images for test {test_id} to {processed_folder}")
image_path_map = self.download_images_parallel(questions_payload, processed_folder, session_id)
image_records = []
question_records = []
for i, q_data in enumerate(questions_payload):
question_number = q_data['question_number']
# Check if image was downloaded
processed_filename = None
local_image_path = image_path_map.get(question_number)
if local_image_path:
processed_filename = os.path.basename(local_image_path)
# Insert into images table
image_insert_result = conn.execute(
'INSERT INTO images (session_id, image_index, filename, original_name, processed_filename, image_type) VALUES (?, ?, ?, ?, ?, ?)',
(session_id, i + 1, q_data.get('image_url', ''), f"Question {question_number}", processed_filename, 'cropped' if processed_filename else 'original_url_only')
)
image_id = image_insert_result.lastrowid
# Insert into questions table
question_records.append((
session_id, image_id, question_number, q_data['status'],
q_data['marked_solution'], q_data['correct_solution'],
q_data.get('subject'), q_data.get('chapter'), q_data.get('topic'), q_data.get('time_taken')
))
conn.executemany(
'INSERT INTO questions (session_id, image_id, question_number, status, marked_solution, actual_solution, subject, chapter, topic, time_taken) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)',
question_records
)
conn.commit()
response_data = {
"status": "success",
"message": "JSON v3.0 processed successfully."
}
if view_mode:
query = "SELECT q.*, i.processed_filename FROM questions q JOIN images i ON q.image_id = i.id WHERE q.session_id = ? ORDER BY i.id"
all_questions = [dict(row) for row in conn.execute(query, (session_id,)).fetchall()]
if not all_questions:
conn.rollback()
raise ValueError('No questions found for PDF generation.')
pdf_output_folder = current_app.config.get('OUTPUT_FOLDER', 'output')
os.makedirs(pdf_output_folder, exist_ok=True)
pdf_filename = f"{secure_filename(test_name)}_{session_id[:8]}.pdf"
create_a4_pdf_from_images(
image_info=all_questions, base_folder=processed_folder, output_filename=pdf_filename,
images_per_page=images_per_page, output_folder=pdf_output_folder,
orientation=orientation
)
conn.execute(
'INSERT INTO generated_pdfs (session_id, filename, subject, tags, notes, source_filename, user_id) VALUES (?, ?, ?, ?, ?, ?, ?)',
(session_id, pdf_filename, test_name, test_mapping_id, 'Generated automatically via JSON v3.0 upload.', original_filename, user_id)
)
conn.commit()
response_data['view_url'] = url_for('main.view_pdf', filename=pdf_filename, _external=True)
response_data['message'] = "PDF auto-generated and saved."
else:
response_data['edit_url'] = url_for('main.question_entry_v2', session_id=session_id, test_name=test_name, _external=True)
response_data['message'] = "Session created for manual review."
return response_data
except ValueError as e:
if conn:
conn.rollback()
current_app.logger.error(f"JSON v3.0 processing error: {e}")
raise # Re-raise to be caught by the endpoint
except sqlite3.Error as e:
if conn:
conn.rollback()
current_app.logger.error(f"Database error during JSON v3.0 processing: {e}")
raise ValueError(f"Database error: {e}")
except Exception as e:
if conn:
conn.rollback()
current_app.logger.error(f"Unhandled error during JSON v3.0 processing: {e}")
raise ValueError(f"An unexpected error occurred: {e}")
finally:
if conn:
conn.close()
|