Spaces:

Jaimodiji
/

Report-Generator

Running

File size: 11,458 Bytes

92a22cd

import json
import os
import requests
from concurrent.futures import ThreadPoolExecutor, as_completed
from jsonschema import validate, ValidationError
import uuid
from flask import current_app, url_for
from werkzeug.utils import secure_filename
import sqlite3 # Import sqlite3
import sys

# Ensure current directory is in Python path for local imports
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))

from database import get_db_connection
from utils import create_a4_pdf_from_images


# JSON v3.0 Schema for validation
JSON_V3_SCHEMA = {
    "type": "object",
    "properties": {
        "version": {"type": "string", "const": "3.0"},
        "source": {"type": "string"},
        "test_name": {"type": "string"},
        "test_id": {"type": "string"},
        "test_mapping_id": {"type": "string"},
        "metadata": {"type": "object"},
        "config": {
            "type": "object",
            "properties": {
                "statuses_to_include": {"type": "array", "items": {"type": "string"}},
                "layout": {
                    "type": "object",
                    "properties": {
                        "images_per_page": {"type": "integer"},
                        "orientation": {"type": "string"}
                    },
                    "required": ["images_per_page", "orientation"]
                }
            },
            "required": ["statuses_to_include", "layout"]
        },
        "questions": {
            "type": "array",
            "items": {
                "type": "object",
                "properties": {
                    "question_number": {"type": "string"},
                    "image_url": {"type": "string", "format": "uri"},
                    "status": {"type": "string"},
                    "marked_solution": {"type": "string"},
                    "correct_solution": {"type": "string"},
                    "subject": {"type": "string"},
                    "chapter": {"type": "string"},
                    "topic": {"type": "string"},
                    "time_taken": {"type": "integer"}
                },
                "required": ["question_number", "image_url", "status", "marked_solution", "correct_solution", "subject", "time_taken"]
            }
        },
        "view": {"type": "boolean"}
    },
    "required": ["version", "source", "test_name", "test_id", "test_mapping_id", "config", "questions", "view"]
}

class JSONProcessorV3:
    def __init__(self, data=None):
        self.data = data

    def validate(self):
        """Validates the JSON data against the v3.0 schema."""
        try:
            validate(instance=self.data, schema=JSON_V3_SCHEMA)
            return True
        except ValidationError as e:
            raise ValueError(f"Schema validation failed: {e.message}")

    def download_image_from_url(self, url, save_path, timeout=30):
        """Downloads an image from a URL and saves it to a path."""
        try:
            response = requests.get(url, timeout=timeout)
            response.raise_for_status()
            with open(save_path, 'wb') as f:
                f.write(response.content)
            return save_path
        except requests.exceptions.RequestException as e:
            print(f"Error downloading image from {url}: {e}") # Keep print for tests
            if current_app:
                current_app.logger.error(f"Error downloading image from {url}: {e}")
            return None

    def download_images_parallel(self, questions, output_dir, session_id, max_workers=10):
        """Downloads all images in parallel and returns a map of question number to local path."""
        image_paths = {}
        with ThreadPoolExecutor(max_workers=max_workers) as executor:
            future_to_question = {
                executor.submit(
                    self.download_image_from_url,
                    q['image_url'],
                    os.path.join(output_dir, f"{session_id}_q_{q['question_number']}.png")
                ): q for q in questions if q.get('image_url')
            }
            
            for future in as_completed(future_to_question):
                question = future_to_question[future]
                url = question['image_url']
                try:
                    path = future.result()
                    if path:
                        image_paths[question['question_number']] = path
                        current_app.logger.info(f"Successfully downloaded image from {url}")
                    else:
                        current_app.logger.error(f"Failed to download image from {url}")
                except Exception as e:
                    current_app.logger.error(f"Error processing image for question {question.get('question_number')} from {url}: {e}")
        return image_paths

    def process(self, user_id=1): # Default user_id for now, replace with actual user
        """Main processing logic for the v3.0 payload, including DB insertion and PDF generation."""
        if not self.data:
            raise ValueError("No data provided to process.")

        current_app.logger.info("Starting processing of JSON v3.0 payload.")
        current_app.logger.info(f"Test Name: {self.data.get('test_name')}")
        current_app.logger.info(f"Test ID: {self.data.get('test_id')}")
        current_app.logger.info(f"Metadata: {self.data.get('metadata')}")

        if not self.validate():
            raise ValueError("Schema validation failed.")
        
        conn = get_db_connection()
        try:
            test_name = self.data['test_name']
            test_id = self.data['test_id']
            test_mapping_id = self.data['test_mapping_id']
            questions_payload = self.data['questions']
            view_mode = self.data.get('view', False)
            metadata = json.dumps(self.data.get('metadata', {})) # Store metadata as JSON string
            
            config = self.data.get('config', {})
            layout = config.get('layout', {})
            images_per_page = layout.get('images_per_page', 4)
            orientation = layout.get('orientation', 'portrait')
            
            session_id = str(uuid.uuid4())
            original_filename = f"{test_name}.json" # Name of the JSON file that was uploaded

            conn.execute(
                'INSERT INTO sessions (id, original_filename, user_id, test_id, test_mapping_id, source, metadata) VALUES (?, ?, ?, ?, ?, ?, ?)',
                (session_id, original_filename, user_id, test_id, test_mapping_id, self.data.get('source', 'manual'), metadata)
            )
            
            processed_folder = current_app.config.get('PROCESSED_FOLDER', 'processed')
            os.makedirs(processed_folder, exist_ok=True)
            
            current_app.logger.info(f"Downloading images for test {test_id} to {processed_folder}")
            image_path_map = self.download_images_parallel(questions_payload, processed_folder, session_id)
            
            image_records = []
            question_records = []

            for i, q_data in enumerate(questions_payload):
                question_number = q_data['question_number']
                
                # Check if image was downloaded
                processed_filename = None
                local_image_path = image_path_map.get(question_number)
                if local_image_path:
                    processed_filename = os.path.basename(local_image_path)
                
                # Insert into images table
                image_insert_result = conn.execute(
                    'INSERT INTO images (session_id, image_index, filename, original_name, processed_filename, image_type) VALUES (?, ?, ?, ?, ?, ?)',
                    (session_id, i + 1, q_data.get('image_url', ''), f"Question {question_number}", processed_filename, 'cropped' if processed_filename else 'original_url_only')
                )
                image_id = image_insert_result.lastrowid
                
                # Insert into questions table
                question_records.append((
                    session_id, image_id, question_number, q_data['status'],
                    q_data['marked_solution'], q_data['correct_solution'],
                    q_data.get('subject'), q_data.get('chapter'), q_data.get('topic'), q_data.get('time_taken')
                ))
            
            conn.executemany(
                'INSERT INTO questions (session_id, image_id, question_number, status, marked_solution, actual_solution, subject, chapter, topic, time_taken) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)',
                question_records
            )
            
            conn.commit()

            response_data = {
                "status": "success",
                "message": "JSON v3.0 processed successfully."
            }

            if view_mode:
                query = "SELECT q.*, i.processed_filename FROM questions q JOIN images i ON q.image_id = i.id WHERE q.session_id = ? ORDER BY i.id"
                all_questions = [dict(row) for row in conn.execute(query, (session_id,)).fetchall()]
                
                if not all_questions:
                    conn.rollback()
                    raise ValueError('No questions found for PDF generation.')

                pdf_output_folder = current_app.config.get('OUTPUT_FOLDER', 'output')
                os.makedirs(pdf_output_folder, exist_ok=True)
                
                pdf_filename = f"{secure_filename(test_name)}_{session_id[:8]}.pdf"
                
                create_a4_pdf_from_images(
                    image_info=all_questions, base_folder=processed_folder, output_filename=pdf_filename,
                    images_per_page=images_per_page, output_folder=pdf_output_folder,
                    orientation=orientation
                )
                
                conn.execute(
                    'INSERT INTO generated_pdfs (session_id, filename, subject, tags, notes, source_filename, user_id) VALUES (?, ?, ?, ?, ?, ?, ?)',
                    (session_id, pdf_filename, test_name, test_mapping_id, 'Generated automatically via JSON v3.0 upload.', original_filename, user_id)
                )
                conn.commit()
                response_data['view_url'] = url_for('main.view_pdf', filename=pdf_filename, _external=True)
                response_data['message'] = "PDF auto-generated and saved."
            else:
                response_data['edit_url'] = url_for('main.question_entry_v2', session_id=session_id, test_name=test_name, _external=True)
                response_data['message'] = "Session created for manual review."
            
            return response_data

        except ValueError as e:
            if conn:
                conn.rollback()
            current_app.logger.error(f"JSON v3.0 processing error: {e}")
            raise # Re-raise to be caught by the endpoint
        except sqlite3.Error as e:
            if conn:
                conn.rollback()
            current_app.logger.error(f"Database error during JSON v3.0 processing: {e}")
            raise ValueError(f"Database error: {e}")
        except Exception as e:
            if conn:
                conn.rollback()
            current_app.logger.error(f"Unhandled error during JSON v3.0 processing: {e}")
            raise ValueError(f"An unexpected error occurred: {e}")
        finally:
            if conn:
                conn.close()