diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000000000000000000000000000000000000..73f4a553238636b5249133fb058d2cde004fdfd2 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,16 @@ +data_repo/ +output/ +processed/ +uploads/ +database.db +backup.zip +venv/ +__pycache__/ +*.pyc +.git/ +.gitignore +.idea/ +.claude/ +backups/ +tmp/ +*.log diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..889c0aad8fb2d27595c699516767b47c404aadff 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +arial.ttf filter=lfs diff=lfs merge=lfs -text diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..1ca50296e8c6be54967c7cc18fb2949ed2a15f9f --- /dev/null +++ b/.gitignore @@ -0,0 +1,21 @@ +.env +.next/ +tmp/ +__pycache__/ +backup/ +bak2/ +*.zip +changes_db.txt +database.db +output/ +processed/ +templates/json_upload.html.bak +uploads/ +venv/ +x +{% +client_secret.json +backups/ +nohup.out +request.json +data_repo/ diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..6720d2d90ede64003b976f5e1069d2975261a870 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,40 @@ +FROM python:3.11-slim + +# Install system dependencies for OpenCV and HF +RUN apt-get update && apt-get install -y \ + git \ + git-lfs \ + curl \ + libgl1-mesa-glx \ + libglib2.0-0 \ + && rm -rf /var/lib/apt/lists/* + +# Install production dependencies +RUN pip install --no-cache-dir huggingface_hub[cli] gunicorn eventlet + +WORKDIR /app + +# Copy requirements and install +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +# Copy the rest of the application +COPY . . + +# Ensure scripts are executable +RUN chmod +x entrypoint.sh + +# HF Spaces default user is 1000 +RUN useradd -m -u 1000 user +RUN chown -R user:user /app +USER user +ENV HOME=/home/user +ENV PATH=/home/user/.local/bin:$PATH + +# Environment variables +ENV PORT=7680 +ENV PYTHONUNBUFFERED=1 + +EXPOSE 7680 + +CMD ["./entrypoint.sh"] diff --git a/GEMINI.md b/GEMINI.md new file mode 100644 index 0000000000000000000000000000000000000000..ae999a80830dc932d12477d9ebe414ad3d72478d --- /dev/null +++ b/GEMINI.md @@ -0,0 +1,91 @@ +# Report Generator Documentation + +This document provides an overview of the Report Generator application, its features, and how the code is structured. + +## Core Functionality + +The primary purpose of this application is to streamline the process of creating analysis reports from PDF documents or images. It is particularly useful for analyzing test papers or other documents containing questions. + +### Workflow + +1. **Upload:** The user can start by uploading either a single PDF file or multiple image files. + * If a PDF is uploaded, the application splits it into individual pages, which are treated as images. +2. **Cropping:** The user is then taken to a cropping interface where they can draw boxes around specific areas of interest on each page (e.g., individual questions). +3. **Data Entry:** After cropping, the user enters details for each cropped image, including: + * Question Number + * Status (Correct, Wrong, Unattempted) + * Marked Answer + * Correct Answer +4. **PDF Generation:** Finally, the user provides metadata for the report (Subject, Tags, Notes) and generates a consolidated PDF report. This report can be filtered to include all questions or only specific statuses (e.g., "Wrong Only"). + +### Key Features + +* **PDF & Image Upload:** Supports both PDF and multiple image uploads. +* **Multi-Box Cropping:** An intuitive interface to select multiple questions from a single page. +* **NVIDIA NIM OCR:** Optionally, the application can use the NVIDIA NIM OCR API to automatically extract question numbers from the cropped images, saving manual entry time. This requires setting the `NVIDIA_API_KEY` environment variable. +* **Session Management:** Each upload creates a session, which can be persisted to prevent automatic deletion. +* **PDF Management:** Generated PDFs are stored and can be managed through a dedicated PDF Manager. + +## PDF Management + +A key feature of this application is the ability to track and manage the final generated PDFs. + +* **Metadata:** Each generated PDF is stored with the following metadata: + * **Subject (Mandatory):** The main subject of the report. + * **Tags (Optional):** Comma-separated tags for easy filtering. + * **Notes (Optional):** A text area for additional details. + * **Source File:** The name of the original PDF or images used to create the report. + * **Creation Date:** The date and time the PDF was generated. +* **Persistence:** Like sessions, generated PDFs can be marked as "Persisted" to prevent them from being automatically deleted. +* **Auto-Deletion:** A cleanup job runs periodically to delete old, non-persisted session data and generated PDFs (defaulting to older than 1 day). +* **PDF Manager Dashboard:** A dedicated dashboard at `/pdf_manager` allows users to: + * View all generated PDFs. + * Search and filter PDFs by subject, tags, or notes. + * Download any generated PDF. + * Toggle the persistence status of a PDF. + * Manually delete a PDF. + +## Code Structure + +The application is built using Flask, a Python web framework. + +### Backend (`app.py`) + +This file contains the core logic of the application. + +* **Database Setup (`setup_database`):** Initializes the SQLite database and creates the necessary tables (`sessions`, `images`, `questions`, `generated_pdfs`). It also handles schema migrations, such as adding new columns. +* **Cleanup (`cleanup_old_data`):** Contains the logic for deleting old, non-persisted data. +* **Flask Routes:** + * `/` & `/v2`: Main landing pages for choosing upload type. + * `/upload_pdf` & `/upload_images`: Handle the file uploads and create new sessions. + * `/cropv2//`: Displays the cropping interface. + * `/process_crop_v2`: Processes the cropping data and saves the cropped images. + * `/question_entry_v2/`: The main data entry page. + * `/save_questions`: Saves the question data to the database. + * `/generate_pdf`: Generates the final PDF report and saves its metadata. + * `/dashboard`: Displays the session management dashboard. + * `/pdf_manager`: Displays the new PDF management dashboard. + * `/delete_session/` & `/toggle_persist/`: Handle session deletion and persistence. + * `/delete_generated_pdf/` & `/toggle_persist_generated_pdf/`: Handle generated PDF deletion and persistence. + * `/extract_question_number` & `/extract_all_question_numbers`: (Optional) Routes for the NVIDIA NIM OCR functionality. + +### Frontend (`templates/`) + +The frontend is built with HTML templates using the Jinja2 templating engine and Bootstrap for styling. + +* **`base.html`:** The base template that other templates extend. +* **`main.html`:** The main entry point, allowing users to choose between PDF and image upload. +* **`indexv2.html` & `image_upload.html`:** The upload forms. +* **`cropv2.html`:** The cropping interface. +* **`question_entry_v2.html`:** The form for entering question details and generating the final PDF. +* **`dashboard.html`:** The dashboard for managing upload sessions. +* **`pdf_manager.html`:** The new dashboard for managing the final generated PDFs. + +### Database (`database.db`) + +A SQLite database is used for data storage. + +* **`sessions`:** Stores information about each upload session. +* **`images`:** Stores information about each page/image, including original and cropped versions. +* **`questions`:** Stores the data for each question. +* **`generated_pdfs`:** Stores the metadata for each final generated PDF. diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..730f8078ec2676ffe63370d19fc79a6b0943675d --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2025 DocuPDF + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/README.md b/README.md index 1418844acf3e8bfa46e87457ce3074acff0495e0..56e34c3866e2dcf19c220d3336d8c27c8f6c5ea0 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,164 @@ ---- -title: Report Generator -emoji: πŸ“š -colorFrom: green -colorTo: gray -sdk: docker -pinned: false ---- - -Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference +# DocuPDF: Smart Scanner & PDF Generator + +DocuPDF is a web-based utility that transforms images of documentsβ€”like test papers, notes, or book pagesβ€”into a polished, organized, and enhanced PDF. It provides powerful 8-point perspective crop, image enhancement tools, and detailed metadata entry, all within your browser. + +## ✨ Key Features + +### Dual Input Support +- **PDF Upload**: Upload a PDF document and extract individual pages as images +- **Image Upload**: Upload multiple image files directly (PNG, JPG, JPEG, GIF, BMP) + +### Advanced Cropping +- Powerful 8-point perspective correction tool +- Draw multiple crop boxes on a single page/image +- Touch-friendly UI with magnifying loupe for precision + +### Image Enhancement +- Fine-tune scans with adjustments for: + - Brightness + - Contrast + - Gamma + +### Metadata Management +- Add structured metadata to each question: + - Question Number + - Subject + - Status (Correct/Wrong/Unattempted) + - Marked Solution + - Actual Solution + - Time Taken + +### Smart Features +- Automatic question number extraction using NVIDIA NIM OCR (when API key is provided) +- Dashboard for session management +- Custom PDF generation with filtering options + +### Modern UI +- Responsive design that works on desktop and mobile +- Dark theme for comfortable extended use +- Keyboard shortcuts for power users + +## πŸ”§ Tech Stack + +- **Backend**: Flask (Python) +- **Image Processing**: OpenCV, Pillow, PyMuPDF +- **Frontend**: HTML5, Bootstrap 5, JavaScript +- **Database**: SQLite +- **OCR**: NVIDIA NIM API (optional) + +## βš™οΈ Installation & Setup + +### Prerequisites +- Python 3.7+ +- pip package installer + +### Step-by-Step Guide + +1. **Clone the Repository** + ```bash + git clone + cd Report-Generator + ``` + +2. **Create and Activate a Virtual Environment (Recommended)** + ```bash + # Create the environment + python -m venv venv + + # Activate it + # On Windows: + .\venv\Scripts\activate + # On macOS/Linux: + source venv/bin/activate + ``` + +3. **Install Dependencies** + ```bash + pip install -r requirements.txt + ``` + +4. **Run the Application** + ```bash + python app.py + ``` + +5. **Open in Browser** + Navigate to `http://127.0.0.1:1302` in your web browser + +### Environment Variables (Optional) + +To enable the automatic question number extraction feature, set the NVIDIA_API_KEY environment variable: + +```bash +# On Linux/macOS: +export NVIDIA_API_KEY="your-api-key-here" + +# On Windows: +set NVIDIA_API_KEY=your-api-key-here + +# Or create a .env file with: +NVIDIA_API_KEY=your-api-key-here +``` + +If you don't set this variable, the application will still work but the automatic question number extraction feature will be disabled. + +## πŸ“– How to Use + +### Workflow Options + +1. **PDF Workflow**: + - Upload a PDF document + - Each page is converted to an image + - Crop and enhance individual pages + - Enter question details + - Generate final PDF + +2. **Image Workflow**: + - Upload multiple image files directly + - Crop and enhance individual images + - Enter question details + - Generate final PDF + +### Step 1: Choose Input Method +- Select either PDF upload or multiple image upload from the main page + +### Step 2: Crop & Enhance +- For each page/image, draw crop boxes around questions +- Use the sliders to adjust brightness, contrast, and gamma +- Save and continue to the next page/image + +### Step 3: Enter Details +- Fill in metadata for each extracted question +- Use productivity features like "Same Subject for All" +- Extract question numbers automatically (if NVIDIA API is configured) + +### Step 4: Generate & Download +- Choose PDF name and layout options +- Filter questions by status if needed +- Generate and download your final document + +## πŸ“ Project Structure + +``` +/ +β”œβ”€β”€ uploads/ # Stores original user uploads (temporary) +β”œβ”€β”€ processed/ # Stores cropped & enhanced images (temporary) +β”œβ”€β”€ output/ # Stores the final generated PDFs +β”œβ”€β”€ templates/ # Contains all HTML files +β”œβ”€β”€ app.py # The main Flask application logic +β”œβ”€β”€ strings.py # Route constants and string definitions +β”œβ”€β”€ requirements.txt # List of Python dependencies +└── README.md # This file +``` + +## πŸ§ͺ Testing + +Run the test suite to verify functionality: + +```bash +python test.py +``` + +## πŸ“„ License + +This project is licensed under the MIT License. \ No newline at end of file diff --git a/api_key_manager.py b/api_key_manager.py new file mode 100644 index 0000000000000000000000000000000000000000..a72e9a1e782c35d68bb1c71448170de643ab9921 --- /dev/null +++ b/api_key_manager.py @@ -0,0 +1,265 @@ +""" +Unified API Key Manager with automatic failover and rotation. + +This module manages multiple API keys for each service and automatically +switches to backup keys when one fails due to rate limiting or errors. +""" + +import os +import time +from typing import List, Dict, Optional, Tuple +from dataclasses import dataclass, field +from datetime import datetime, timedelta +import threading +import logging + +logger = logging.getLogger(__name__) + +@dataclass +class APIKeyStatus: + """Tracks the status of an individual API key.""" + key: str + service: str + last_used: Optional[datetime] = None + failure_count: int = 0 + last_failure: Optional[datetime] = None + is_blocked: bool = False + blocked_until: Optional[datetime] = None + total_requests: int = 0 + successful_requests: int = 0 + + def mark_success(self): + """Mark a successful API call.""" + self.last_used = datetime.now() + self.total_requests += 1 + self.successful_requests += 1 + self.failure_count = 0 # Reset failure count on success + self.is_blocked = False + self.blocked_until = None + + def mark_failure(self, block_duration_minutes: int = 5): + """Mark a failed API call and potentially block the key.""" + self.last_used = datetime.now() + self.last_failure = datetime.now() + self.total_requests += 1 + self.failure_count += 1 + + # Block key after 3 consecutive failures + if self.failure_count >= 3: + self.is_blocked = True + self.blocked_until = datetime.now() + timedelta(minutes=block_duration_minutes) + logger.warning(f"API key for {self.service} blocked until {self.blocked_until} after {self.failure_count} failures") + + def is_available(self) -> bool: + """Check if this key is available for use.""" + if not self.is_blocked: + return True + + # Check if block has expired + if self.blocked_until and datetime.now() > self.blocked_until: + self.is_blocked = False + self.blocked_until = None + self.failure_count = 0 + logger.info(f"API key for {self.service} unblocked after cooldown period") + return True + + return False + + def get_success_rate(self) -> float: + """Calculate success rate percentage.""" + if self.total_requests == 0: + return 100.0 + return (self.successful_requests / self.total_requests) * 100 + + +class APIKeyManager: + """ + Manages multiple API keys for different services with automatic failover. + + Supports multiple keys per service and automatically rotates to backup keys + when one fails or hits rate limits. + """ + + def __init__(self): + self.keys: Dict[str, List[APIKeyStatus]] = {} + self.current_index: Dict[str, int] = {} + self.lock = threading.Lock() + self._load_keys_from_env() + + def _load_keys_from_env(self): + """Load API keys from environment variables.""" + + # NVIDIA API Keys + nvidia_keys = self._get_keys_from_env('NVIDIA_API_KEY') + if nvidia_keys: + self.register_service('nvidia', nvidia_keys) + + # Gemini API Keys + gemini_keys = self._get_keys_from_env('GEMINI_API_KEY') + google_keys = self._get_keys_from_env('GOOGLE_API_KEY') + all_gemini_keys = gemini_keys + google_keys + if all_gemini_keys: + self.register_service('gemini', all_gemini_keys) + + # OpenRouter API Keys (for Nova) + openrouter_keys = self._get_keys_from_env('OPENROUTER_API_KEY') + if openrouter_keys: + self.register_service('openrouter', openrouter_keys) + + logger.info(f"Loaded API keys: NVIDIA={len(nvidia_keys)}, Gemini={len(all_gemini_keys)}, OpenRouter={len(openrouter_keys)}") + + def _get_keys_from_env(self, base_name: str) -> List[str]: + """ + Get API keys from environment variables. + Loads keys in order: + 1. BASE_NAME (as index 0) + 2. BASE_NAME_1, BASE_NAME_2, BASE_NAME_3, etc. (as indices 1, 2, 3...) + + Example: + - GEMINI_API_KEY β†’ index 0 + - GEMINI_API_KEY_1 β†’ index 1 + - GEMINI_API_KEY_2 β†’ index 2 + """ + keys = [] + + # First, try base key (index 0) + base_key = os.environ.get(base_name) + if base_key: + keys.append(base_key) + + # Then try numbered keys (1-10) + for i in range(1, 11): + numbered_key = os.environ.get(f"{base_name}_{i}") + if numbered_key: + keys.append(numbered_key) + + # Remove duplicates while preserving order + seen = set() + unique_keys = [] + for key in keys: + if key not in seen: + seen.add(key) + unique_keys.append(key) + + return unique_keys + + def register_service(self, service: str, api_keys: List[str]): + """Register multiple API keys for a service.""" + with self.lock: + self.keys[service] = [ + APIKeyStatus(key=key, service=service) + for key in api_keys + ] + self.current_index[service] = 0 + logger.info(f"Registered {len(api_keys)} API key(s) for service: {service}") + + def get_key(self, service: str) -> Optional[Tuple[str, int]]: + """ + Get an available API key for the specified service. + Returns (api_key, key_index) or (None, -1) if no keys available. + """ + with self.lock: + if service not in self.keys or not self.keys[service]: + logger.warning(f"No API keys registered for service: {service}") + return None, -1 + + service_keys = self.keys[service] + start_index = self.current_index[service] + + # Try to find an available key, starting from current index + for attempt in range(len(service_keys)): + current_idx = (start_index + attempt) % len(service_keys) + key_status = service_keys[current_idx] + + if key_status.is_available(): + self.current_index[service] = current_idx + logger.debug(f"Using API key {current_idx + 1}/{len(service_keys)} for {service}") + return key_status.key, current_idx + + # All keys are blocked + logger.error(f"All API keys for {service} are currently blocked or unavailable") + return None, -1 + + def mark_success(self, service: str, key_index: int): + """Mark an API call as successful.""" + with self.lock: + if service in self.keys and 0 <= key_index < len(self.keys[service]): + self.keys[service][key_index].mark_success() + logger.debug(f"API key {key_index + 1} for {service} marked as successful") + + # Move to next key for load balancing (round-robin) + self.current_index[service] = (key_index + 1) % len(self.keys[service]) + + def mark_failure(self, service: str, key_index: int, block_duration_minutes: int = 5): + """Mark an API call as failed and potentially block the key.""" + with self.lock: + if service in self.keys and 0 <= key_index < len(self.keys[service]): + self.keys[service][key_index].mark_failure(block_duration_minutes) + logger.warning(f"API key {key_index + 1} for {service} marked as failed") + + # Move to next key immediately + self.current_index[service] = (key_index + 1) % len(self.keys[service]) + + def get_service_status(self, service: str) -> Dict: + """Get status information for a service.""" + with self.lock: + if service not in self.keys: + return { + 'service': service, + 'available': False, + 'total_keys': 0, + 'available_keys': 0, + 'blocked_keys': 0 + } + + service_keys = self.keys[service] + available_keys = sum(1 for k in service_keys if k.is_available()) + blocked_keys = sum(1 for k in service_keys if k.is_blocked) + + return { + 'service': service, + 'available': available_keys > 0, + 'total_keys': len(service_keys), + 'available_keys': available_keys, + 'blocked_keys': blocked_keys, + 'keys': [ + { + 'index': i, + 'is_available': k.is_available(), + 'is_blocked': k.is_blocked, + 'failure_count': k.failure_count, + 'total_requests': k.total_requests, + 'success_rate': round(k.get_success_rate(), 2), + 'blocked_until': k.blocked_until.isoformat() if k.blocked_until else None + } + for i, k in enumerate(service_keys) + ] + } + + def get_all_services_status(self) -> Dict[str, Dict]: + """Get status for all registered services.""" + return { + service: self.get_service_status(service) + for service in self.keys.keys() + } + + def reset_service(self, service: str): + """Reset all keys for a service (unblock and clear stats).""" + with self.lock: + if service in self.keys: + for key_status in self.keys[service]: + key_status.is_blocked = False + key_status.blocked_until = None + key_status.failure_count = 0 + logger.info(f"Reset all keys for service: {service}") + + +# Global singleton instance +_api_key_manager = None + +def get_api_key_manager() -> APIKeyManager: + """Get the global API key manager instance.""" + global _api_key_manager + if _api_key_manager is None: + _api_key_manager = APIKeyManager() + return _api_key_manager diff --git a/app.py b/app.py new file mode 100644 index 0000000000000000000000000000000000000000..a8b5ea234004d4b03f3d2c2a4ece2c35402b3f67 --- /dev/null +++ b/app.py @@ -0,0 +1,93 @@ +import os +import sys +from flask import Flask +from flask_cors import CORS +from flask_socketio import SocketIO +from datetime import datetime, date +from dotenv import load_dotenv + +# Load environment variables from .env file +load_dotenv() + +# Ensure the current directory is in the Python path +sys.path.append(os.path.abspath(os.path.dirname(__file__))) + +from database import setup_database + +socketio = SocketIO() + +def humanize_datetime(dt_str): + """Converts a datetime string to a human-friendly format.""" + if not dt_str: + return "" + try: + # Split the string at the decimal point to handle microseconds + dt = datetime.fromisoformat(dt_str.split('.')[0]) + today = date.today() + if dt.date() == today: + return "Today" + elif dt.date() == date.fromordinal(today.toordinal() - 1): + return "Yesterday" + else: + return dt.strftime('%b %d, %Y') + except (ValueError, TypeError): + return dt_str # Return original string if parsing fails + +def create_app(): + app = Flask(__name__) + CORS(app) + socketio.init_app(app, cors_allowed_origins="*") + + # Register custom Jinja2 filter + app.jinja_env.filters['humanize'] = humanize_datetime + app.jinja_env.filters['chr'] = chr + + # Configuration + app.config['SECRET_KEY'] = os.urandom(24) + app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024 * 4096 + app.config['UPLOAD_FOLDER'] = 'uploads' + app.config['PROCESSED_FOLDER'] = 'processed' + app.config['OUTPUT_FOLDER'] = 'output' + app.config['TEMP_FOLDER'] = 'tmp' + + # Ensure instance folders exist + for folder in [app.config['UPLOAD_FOLDER'], app.config['PROCESSED_FOLDER'], app.config['OUTPUT_FOLDER'], app.config['TEMP_FOLDER']]: + os.makedirs(folder, exist_ok=True) + + with app.app_context(): + setup_database() + + # Setup Login Manager + from user_auth import setup_login_manager + setup_login_manager(app) + + # Register Blueprints + from routes import main_bp + from json_processor import json_bp + from neetprep import neetprep_bp + from classifier_routes import classifier_bp + from dashboard import dashboard_bp + from image_routes import image_bp + from auth_routes import auth_bp + from settings_routes import settings_bp + from subjective_routes import subjective_bp + from camera_routes import camera_bp + from drive_routes import drive_bp + from qtab_routes import qtab_bp + + app.register_blueprint(main_bp) + app.register_blueprint(json_bp) + app.register_blueprint(neetprep_bp) + app.register_blueprint(classifier_bp) + app.register_blueprint(dashboard_bp) + app.register_blueprint(image_bp) + app.register_blueprint(auth_bp) + app.register_blueprint(settings_bp) + app.register_blueprint(subjective_bp) + app.register_blueprint(camera_bp) + app.register_blueprint(drive_bp) + app.register_blueprint(qtab_bp) + + return app + +app = create_app() \ No newline at end of file diff --git a/auth_routes.py b/auth_routes.py new file mode 100644 index 0000000000000000000000000000000000000000..77824bfa5b4bfd8200a17d7c7cdf53c283460ea8 --- /dev/null +++ b/auth_routes.py @@ -0,0 +1,60 @@ +from flask import Blueprint, render_template, request, redirect, url_for, flash +from flask_login import login_user, logout_user, login_required +from user_auth import User +from werkzeug.security import check_password_hash +from urllib.parse import urlparse + +auth_bp = Blueprint('auth', __name__) + +@auth_bp.route('/login', methods=['GET', 'POST']) +def login(): + if request.method == 'POST': + username = request.form.get('username') + password = request.form.get('password') + remember = True if request.form.get('remember') else False + + user = User.get_by_username(username) + + if not user or not check_password_hash(user.password_hash, password): + flash('Please check your login details and try again.') + return redirect(url_for('auth.login')) + + login_user(user, remember=remember) + + next_page = request.form.get('next') + # Security: Only redirect to local paths + if next_page and urlparse(next_page).netloc == '': + return redirect(next_page) + + return redirect(url_for('dashboard.dashboard')) + + return render_template('login.html') + +@auth_bp.route('/register', methods=['GET', 'POST']) +def register(): + if request.method == 'POST': + username = request.form.get('username') + email = request.form.get('email') + password = request.form.get('password') + + # Check if user already exists + if User.get_by_username(username): + flash('Username already exists.') + return redirect(url_for('auth.register')) + + # Create new user + user = User.create(username, email, password) + if user: + login_user(user) + return redirect(url_for('dashboard.dashboard')) + else: + flash('An error occurred during registration.') + return redirect(url_for('auth.register')) + + return render_template('register.html') + +@auth_bp.route('/logout') +@login_required +def logout(): + logout_user() + return redirect(url_for('main.index')) diff --git a/backup.py b/backup.py new file mode 100644 index 0000000000000000000000000000000000000000..805723ebb8fdae8ba55690f437ef892366068b78 --- /dev/null +++ b/backup.py @@ -0,0 +1,80 @@ +import sqlite3 +import json +import os +import shutil +import zipfile + +def backup_database_and_files(db_path='database.db', backup_dir='backup', zip_filename='backup.zip'): + """ + Exports all tables from the SQLite database to JSON files, backs up associated files, + and creates a zip archive of the backup. + + :param db_path: Path to the SQLite database file. + :param backup_dir: Directory to save the backup. + :param zip_filename: Name of the output zip file. + """ + if os.path.exists(backup_dir): + shutil.rmtree(backup_dir) + os.makedirs(backup_dir) + + # 1. Backup the database to JSON files + conn = None + try: + conn = sqlite3.connect(db_path) + conn.row_factory = sqlite3.Row + cursor = conn.cursor() + + cursor.execute("SELECT name FROM sqlite_master WHERE type='table';") + tables = [row[0] for row in cursor.fetchall()] + + for table_name in tables: + print(f"Backing up table: {table_name}") + cursor.execute(f"SELECT * FROM {table_name}") + rows = cursor.fetchall() + data = [dict(row) for row in rows] + + backup_file_path = os.path.join(backup_dir, f"{table_name}.json") + with open(backup_file_path, 'w') as f: + json.dump(data, f, indent=4) + + print(f"Successfully backed up {table_name} to {backup_file_path}") + + except sqlite3.Error as e: + print(f"Database error: {e}") + return + finally: + if conn: + conn.close() + + # 2. Backup associated files + file_dirs_to_backup = ['output', 'processed', 'uploads'] + for dir_name in file_dirs_to_backup: + source_dir = dir_name + dest_dir = os.path.join(backup_dir, dir_name) + + if os.path.exists(source_dir): + print(f"Backing up directory: {source_dir}") + shutil.copytree(source_dir, dest_dir) + print(f"Successfully backed up {source_dir} to {dest_dir}") + else: + print(f"Directory not found, skipping: {source_dir}") + + # 3. Create a zip archive of the backup directory + print(f"\nCreating zip archive: {zip_filename}") + with zipfile.ZipFile(zip_filename, 'w', zipfile.ZIP_DEFLATED) as zipf: + for root, dirs, files in os.walk(backup_dir): + for file in files: + file_path = os.path.join(root, file) + arcname = os.path.relpath(file_path, backup_dir) + zipf.write(file_path, arcname) + + print(f"Successfully created {zip_filename}") + + # 4. Clean up the backup directory + shutil.rmtree(backup_dir) + print(f"Cleaned up backup directory: {backup_dir}") + + print("\nBackup complete!") + +if __name__ == '__main__': + backup_database_and_files() diff --git a/camera_routes.py b/camera_routes.py new file mode 100644 index 0000000000000000000000000000000000000000..e6d75c902a0a961ae00c77e8b20975d002bfcedd --- /dev/null +++ b/camera_routes.py @@ -0,0 +1,95 @@ +from flask import Blueprint, render_template, request, jsonify +from flask_login import login_required, current_user +from flask_socketio import emit, join_room +from app import socketio +from werkzeug.utils import secure_filename +from database import get_db_connection +import os +import uuid + +camera_bp = Blueprint('camera', __name__) + +@camera_bp.route('/camera_web') +@login_required +def camera_web(): + return render_template('camera_web.html') + +@camera_bp.route('/camera_mobile') +@login_required +def camera_mobile(): + # camera_id can be used to select specific camera if needed, defaulting to back camera + return render_template('camera_mobile.html') + +# --- WebRTC Signaling --- + +@socketio.on('join') +def handle_join(data): + room = data.get('room', 'stream_room') + join_room(room) + print(f"Client joined room: {room}") + emit('user_joined', {'message': 'A user has joined'}, room=room) + +@socketio.on('offer') +def handle_offer(data): + room = data.get('room', 'stream_room') + print("Received offer") + emit('offer', data['offer'], room=room, include_self=False) + +@socketio.on('answer') +def handle_answer(data): + room = data.get('room', 'stream_room') + print("Received answer") + emit('answer', data['answer'], room=room, include_self=False) + +@socketio.on('candidate') +def handle_candidate(data): + room = data.get('room', 'stream_room') + print("Received candidate") + emit('candidate', data['candidate'], room=room, include_self=False) + +@socketio.on('remote_capture') +def handle_remote_capture(data): + room = data.get('room', 'stream_room') + print("Received remote capture request") + emit('trigger_capture', {}, room=room, include_self=False) + +@camera_bp.route('/camera/upload_captured_image', methods=['POST']) +@login_required +def upload_captured_image(): + if 'image' not in request.files: + return jsonify({'error': 'No image file provided'}), 400 + + file = request.files['image'] + if file.filename == '': + return jsonify({'error': 'No selected file'}), 400 + + if file: + session_id = str(uuid.uuid4()) + original_filename = secure_filename(file.filename) or f"captured_image_{session_id}.png" + + # Save to UPLOAD_FOLDER or TEMP_FOLDER + # For captured images, TEMP_FOLDER is suitable, then processed further + save_path = os.path.join(os.getcwd(), 'tmp', original_filename) # Using tmp folder relative to CWD + file.save(save_path) + + conn = get_db_connection() + try: + conn.execute( + 'INSERT INTO sessions (id, original_filename, name, user_id, session_type) VALUES (?, ?, ?, ?, ?)', + (session_id, original_filename, original_filename, current_user.id, 'image_capture') + ) + # Insert the image into the images table + conn.execute( + 'INSERT INTO images (session_id, image_index, filename, original_name, image_type) VALUES (?, ?, ?, ?, ?)', + (session_id, 0, original_filename, original_filename, 'original') + ) + conn.commit() + except Exception as e: + conn.rollback() + return jsonify({'error': f'Database error: {str(e)}'}), 500 + finally: + conn.close() + + return jsonify({'success': True, 'session_id': session_id, 'filename': original_filename}) + + return jsonify({'error': 'Image capture failed'}), 500 diff --git a/classifier_routes.py b/classifier_routes.py new file mode 100644 index 0000000000000000000000000000000000000000..413bbb991391a3006cd08ed1134f83d47888c509 --- /dev/null +++ b/classifier_routes.py @@ -0,0 +1,282 @@ +from flask import Blueprint, jsonify, current_app, render_template, request +from flask_login import login_required, current_user +from utils import get_db_connection +import os +import time +import json +from processing import resize_image_if_needed, call_nim_ocr_api +from gemini_classifier import classify_questions_with_gemini +from gemma_classifier import GemmaClassifier +from nova_classifier import classify_questions_with_nova + +classifier_bp = Blueprint('classifier_bp', __name__) + +# Instantiate classifiers +gemma_classifier = GemmaClassifier() + +@classifier_bp.route('/classified/edit') +@login_required +def edit_classified_questions(): + """Renders the page for editing classified questions.""" + conn = get_db_connection() + + AVAILABLE_SUBJECTS = ["Biology", "Chemistry", "Physics", "Mathematics"] + + # Security: Fetch questions belonging to the current user + questions_from_db = conn.execute(""" + SELECT q.id, q.question_text, q.chapter, q.subject, q.tags + FROM questions q + JOIN sessions s ON q.session_id = s.id + WHERE s.user_id = ? AND q.subject IS NOT NULL AND q.chapter IS NOT NULL + ORDER BY q.id + """, (current_user.id,)).fetchall() + + questions = [] + for q in questions_from_db: + q_dict = dict(q) + plain_text = q_dict['question_text'] # It's already plain text from OCR + q_dict['question_text_plain'] = (plain_text[:100] + '...') if len(plain_text) > 100 else plain_text + questions.append(q_dict) + + # Suggestions should also be user-specific + chapters = conn.execute('SELECT DISTINCT q.chapter FROM questions q JOIN sessions s ON q.session_id = s.id WHERE s.user_id = ? AND q.chapter IS NOT NULL ORDER BY q.chapter', (current_user.id,)).fetchall() + tags_query = conn.execute('SELECT DISTINCT q.tags FROM questions q JOIN sessions s ON q.session_id = s.id WHERE s.user_id = ? AND q.tags IS NOT NULL AND q.tags != \'\'', (current_user.id,)).fetchall() + all_tags = set() + for row in tags_query: + tags = [tag.strip() for tag in row['tags'].split(',')] + all_tags.update(tags) + + conn.close() + return render_template('classified_edit.html', + questions=questions, + chapters=[c['chapter'] for c in chapters], + all_tags=sorted(list(all_tags)), + available_subjects=AVAILABLE_SUBJECTS) + +@classifier_bp.route('/classified/update_question/', methods=['POST']) +@login_required +def update_classified_question(question_id): + """Handles updating a question's metadata.""" + data = request.json + new_chapter = data.get('chapter') + new_subject = data.get('subject') + + if not new_chapter or not new_subject: + return jsonify({'error': 'Chapter and Subject cannot be empty.'}), 400 + + try: + conn = get_db_connection() + # Security: Check ownership before update + question_owner = conn.execute("SELECT s.user_id FROM questions q JOIN sessions s ON q.session_id = s.id WHERE q.id = ?", (question_id,)).fetchone() + if not question_owner or question_owner['user_id'] != current_user.id: + conn.close() + return jsonify({'error': 'Unauthorized'}), 403 + + conn.execute( + 'UPDATE questions SET chapter = ?, subject = ? WHERE id = ?', + (new_chapter, new_subject, question_id) + ) + conn.commit() + conn.close() + return jsonify({'success': True}) + except Exception as e: + current_app.logger.error(f"Error updating question {question_id}: {repr(e)}") + return jsonify({'error': str(e)}), 500 + +@classifier_bp.route('/classified/delete_question/', methods=['DELETE']) +@login_required +def delete_classified_question(question_id): + """Handles deleting a classified question.""" + try: + conn = get_db_connection() + # Security: Check ownership before delete + question_owner = conn.execute("SELECT s.user_id FROM questions q JOIN sessions s ON q.session_id = s.id WHERE q.id = ?", (question_id,)).fetchone() + if not question_owner or question_owner['user_id'] != current_user.id: + conn.close() + return jsonify({'error': 'Unauthorized'}), 403 + + # Update the question to remove classification + conn.execute('UPDATE questions SET subject = NULL, chapter = NULL WHERE id = ?', (question_id,)) + + conn.commit() + conn.close() + return jsonify({'success': True}) + except Exception as e: + current_app.logger.error(f"Error deleting question {question_id}: {repr(e)}") + return jsonify({'error': str(e)}), 500 + +@classifier_bp.route('/classified/delete_many', methods=['POST']) +@login_required +def delete_many_classified_questions(): + """Handles bulk deleting classified questions.""" + data = request.json + question_ids = data.get('ids', []) + + if not question_ids: + return jsonify({'error': 'No question IDs provided.'}), 400 + + try: + conn = get_db_connection() + # Security: Filter IDs to only those owned by the user + placeholders = ','.join('?' for _ in question_ids) + owned_q_ids_rows = conn.execute(f""" + SELECT q.id FROM questions q + JOIN sessions s ON q.session_id = s.id + WHERE q.id IN ({placeholders}) AND s.user_id = ? + """, (*question_ids, current_user.id)).fetchall() + + owned_q_ids = [row['id'] for row in owned_q_ids_rows] + + if not owned_q_ids: + conn.close() + return jsonify({'success': True, 'message': 'No owned questions to delete.'}) + + update_placeholders = ','.join('?' for _ in owned_q_ids) + conn.execute(f'UPDATE questions SET subject = NULL, chapter = NULL WHERE id IN ({update_placeholders})', owned_q_ids) + + conn.commit() + conn.close() + return jsonify({'success': True}) + except Exception as e: + current_app.logger.error(f"Error deleting questions: {repr(e)}") + return jsonify({'error': str(e)}), 500 + +from rich.table import Table +from rich.console import Console + +@classifier_bp.route('/extract_and_classify_all/', methods=['POST']) +@login_required +def extract_and_classify_all(session_id): + try: + conn = get_db_connection() + # Security: Check ownership of the session + session_owner = conn.execute('SELECT user_id FROM sessions WHERE id = ?', (session_id,)).fetchone() + if not session_owner or session_owner['user_id'] != current_user.id: + conn.close() + return jsonify({'error': 'Unauthorized'}), 403 + + images = conn.execute( + "SELECT id, processed_filename FROM images WHERE session_id = ? AND image_type = 'cropped' ORDER BY id", + (session_id,) + ).fetchall() + + if not images: + conn.close() + return jsonify({'error': 'No cropped images found in session'}), 404 + + current_app.logger.info(f"Found {len(images)} images to process for user {current_user.id}.") + + question_texts = [] + image_ids = [] + for image in images: + image_id = image['id'] + processed_filename = image['processed_filename'] + + if not processed_filename: + continue + + image_path = os.path.join(current_app.config['PROCESSED_FOLDER'], processed_filename) + if not os.path.exists(image_path): + continue + + image_bytes = resize_image_if_needed(image_path) + ocr_result = call_nim_ocr_api(image_bytes) + + current_app.logger.info(f"NVIDIA OCR Result for image {image_id}: {ocr_result}") + + if not ocr_result.get('data') or not ocr_result['data'][0].get('text_detections'): + current_app.logger.error(f"NVIDIA OCR result for image {image_id} does not contain 'text_detections' key. Full response: {ocr_result}") + continue + + text = " ".join(item['text_prediction']['text'] for item in ocr_result['data'][0]['text_detections']) + + conn.execute('UPDATE questions SET question_text = ? WHERE image_id = ?', (text, image_id)) + current_app.logger.info(f"Updated question_text for image_id: {image_id}") + question_texts.append(text) + image_ids.append(image_id) + + conn.commit() + + # --- Batch Processing and Classification --- + batch_size = 7 # Default batch size + total_questions = len(question_texts) + num_batches = (total_questions + batch_size - 1) // batch_size + total_update_count = 0 + + for i in range(num_batches): + start_index = i * batch_size + end_index = start_index + batch_size + + batch_texts = question_texts[start_index:end_index] + batch_image_ids = image_ids[start_index:end_index] + + if not batch_texts: + continue + + current_app.logger.info(f"Processing Batch {i+1}/{num_batches}...") + + # Choose classifier based on user preference + classifier_model = getattr(current_user, 'classifier_model', 'gemini') + + if classifier_model == 'nova': + current_app.logger.info(f"Using Nova classifier for user {current_user.id}") + classification_result = classify_questions_with_nova(batch_texts, start_index=start_index) + model_name = "Nova" + elif classifier_model == 'gemma': + current_app.logger.info(f"Using Gemma classifier for user {current_user.id}") + classification_result = gemma_classifier.classify(batch_texts, start_index=start_index) + model_name = "Gemma" + else: + current_app.logger.info(f"Using Gemini classifier for user {current_user.id}") + classification_result = classify_questions_with_gemini(batch_texts, start_index=start_index) + model_name = "Gemini" + + # Log the result to the terminal + current_app.logger.info(f"--- Classification Result ({model_name}) for Batch {i+1} ---") + current_app.logger.info(json.dumps(classification_result, indent=2)) + current_app.logger.info("---------------------------------------------") + + if not classification_result or not classification_result.get('data'): + current_app.logger.error(f'{model_name} classifier did not return valid data for batch {i+1}.') + continue # Move to the next batch + + # --- Immediate DB Update for the Batch --- + batch_update_count = 0 + for item in classification_result.get('data', []): + item_index_global = item.get('index') # This is the global index (e.g., 1 to 14) + if item_index_global is not None: + # Find the corresponding local index in our full list + try: + # The item_index_global is 1-based, our list is 0-based + local_list_index = item_index_global - 1 + # Find the image_id for that question + matched_id = image_ids[local_list_index] + except IndexError: + current_app.logger.error(f"Classifier returned an out-of-bounds index: {item_index_global}") + continue + + new_subject = item.get('subject') + new_chapter = item.get('chapter_title') + + if new_subject and new_subject != 'Unclassified' and new_chapter and new_chapter != 'Unclassified': + conn.execute('UPDATE questions SET subject = ?, chapter = ? WHERE image_id = ?', (new_subject, new_chapter, matched_id)) + batch_update_count += 1 + elif new_subject and new_subject != 'Unclassified': + conn.execute('UPDATE questions SET subject = ?, chapter = ? WHERE image_id = ?', (new_subject, 'Unclassified', matched_id)) + batch_update_count += 1 + + conn.commit() + total_update_count += batch_update_count + current_app.logger.info(f"Batch {i+1} processed. Updated {batch_update_count} questions in the database.") + + if i < num_batches - 1: + current_app.logger.info("Waiting 5 seconds before next batch...") + time.sleep(5) + + conn.close() + + return jsonify({'success': True, 'message': f'Successfully extracted and classified {total_questions} questions. Updated {total_update_count} entries in the database.'}) + + except Exception as e: + current_app.logger.error(f'Failed to extract and classify questions: {str(e)}', exc_info=True) + return jsonify({'error': f'Failed to extract and classify questions: {str(e)}'}), 500 diff --git a/cleanup.py b/cleanup.py new file mode 100644 index 0000000000000000000000000000000000000000..326cc5d5a65f6ab654e1a8fe9b2b0b20fcf5c958 --- /dev/null +++ b/cleanup.py @@ -0,0 +1,286 @@ + +import sqlite3 +import os +from datetime import datetime, timedelta +from rich.console import Console +from rich.table import Table + +# --- Configuration --- +DB_PATH = 'database.db' +UPLOAD_FOLDER = 'uploads' +PROCESSED_FOLDER = 'processed' +OUTPUT_FOLDER = 'output' +OLDER_THAN_DAYS = 5 +DRY_RUN = True # Set to False to perform actual deletion + +# --- Immunity Reasons --- +REASON_PERSISTED = "Persisted" +REASON_NEETPREP = "NeetPrep/JSON" +REASON_CLASSIFIED = "Classified" +REASON_RECENT = "Too Recent" + +def get_db_connection(): + """Establishes a connection to the SQLite database.""" + conn = sqlite3.connect(DB_PATH) + conn.row_factory = sqlite3.Row + return conn + +def is_classified_session(conn, session_id): + """Checks if a session contains any classified questions.""" + if not session_id: + return False + cursor = conn.cursor() + cursor.execute(""" + SELECT 1 FROM questions + WHERE session_id = ? AND subject IS NOT NULL AND chapter IS NOT NULL + LIMIT 1 + """, (session_id,)) + return cursor.fetchone() is not None + +def show_disk_usage_report(console): + """Calculates and displays a report of disk usage by category.""" + console.print("\n[bold cyan]Disk Usage Report[/bold cyan]") + + def sizeof_fmt(num, suffix="B"): + """Formats a size in bytes to a human-readable string.""" + for unit in ["", "K", "M", "G", "T", "P", "E", "Z"]: + if abs(num) < 1024.0: + return f"{num:3.1f}{unit}{suffix}" + num /= 1024.0 + return f"{num:.1f}Y{suffix}" + + # --- Summary Report --- + usage_data = {} + folders_to_scan = { + "Uploaded Originals": UPLOAD_FOLDER, + "Processed Images": PROCESSED_FOLDER, + "Generated PDFs": OUTPUT_FOLDER, + } + + for category, folder in folders_to_scan.items(): + total_size = 0 + file_count = 0 + try: + for dirpath, _, filenames in os.walk(folder): + for f in filenames: + fp = os.path.join(dirpath, f) + if not os.path.islink(fp): + try: + total_size += os.path.getsize(fp) + file_count += 1 + except FileNotFoundError: + pass + except FileNotFoundError: + pass + usage_data[category] = {"size": total_size, "count": file_count} + + summary_table = Table(title="Disk Space Usage by Category") + summary_table.add_column("Category", style="cyan") + summary_table.add_column("File Count", style="magenta", justify="right") + summary_table.add_column("Total Size", style="green", justify="right") + + total_size_all = 0 + total_count_all = 0 + for category, data in usage_data.items(): + summary_table.add_row(category, str(data["count"]), sizeof_fmt(data["size"])) + total_size_all += data["size"] + total_count_all += data["count"] + + summary_table.add_section() + summary_table.add_row("Total", f"[bold]{total_count_all}[/bold]", f"[bold]{sizeof_fmt(total_size_all)}[/bold]") + + console.print(summary_table) + + # --- Detailed Breakdown for Uploaded Originals --- + console.print("\n[bold]Breakdown of 'Uploaded Originals':[/bold]") + + conn = get_db_connection() + sessions = conn.execute('SELECT id, original_filename FROM sessions').fetchall() + + session_sizes = [] + with console.status("[cyan]Calculating size per session...[/cyan]"): + for session in sessions: + session_id = session['id'] + images = conn.execute("SELECT filename FROM images WHERE session_id = ? AND image_type = 'original'", (session_id,)).fetchall() + + total_size = 0 + file_count = 0 + for img in images: + if not img['filename']: continue + try: + fp = os.path.join(UPLOAD_FOLDER, img['filename']) + if not os.path.islink(fp): + total_size += os.path.getsize(fp) + file_count += 1 + except FileNotFoundError: + pass # File may not exist, that's okay + + if file_count > 0: + session_sizes.append({ + "id": session_id, + "name": session['original_filename'], + "size": total_size, + "count": file_count + }) + + # Sort sessions by size, descending + session_sizes.sort(key=lambda x: x['size'], reverse=True) + + breakdown_table = Table(show_header=True, header_style="bold magenta") + breakdown_table.add_column("Session ID", style="dim", min_width=15) + breakdown_table.add_column("Original Filename", style="cyan", min_width=30) + breakdown_table.add_column("File Count", style="magenta", justify="right") + breakdown_table.add_column("Total Size", style="green", justify="right") + + for session_data in session_sizes: + breakdown_table.add_row( + session_data['id'], + session_data['name'], + str(session_data['count']), + sizeof_fmt(session_data['size']) + ) + + console.print(breakdown_table) + conn.close() + + + +def main(): + """Main function to identify and clean up old data.""" + console = Console() + console.print(f"[bold cyan]Starting Cleanup Process...[/bold cyan]") + console.print(f"Mode: [bold {'yellow' if DRY_RUN else 'red'}]{'DRY RUN' if DRY_RUN else 'DELETION ENABLED'}[/]") + console.print(f"Looking for items older than {OLDER_THAN_DAYS} days.") + + show_disk_usage_report(console) + + conn = get_db_connection() + cutoff_date = datetime.now() - timedelta(days=OLDER_THAN_DAYS) + + sessions_to_delete = [] + pdfs_to_delete = [] + + # --- 1. Identify Sessions to Delete --- + all_sessions = conn.execute('SELECT id, created_at, original_filename, persist FROM sessions').fetchall() + + with console.status("[cyan]Analyzing sessions...[/cyan]") as status: + for session in all_sessions: + session_id = session['id'] + reason = "" + + created_at = datetime.fromisoformat(session['created_at']) + + if created_at > cutoff_date: + reason = REASON_RECENT + elif session['persist'] == 1: + reason = REASON_PERSISTED + elif session['original_filename'] and ('.json' in session['original_filename'].lower() or 'neetprep' in session['original_filename'].lower()): + reason = REASON_NEETPREP + elif is_classified_session(conn, session_id): + reason = REASON_CLASSIFIED + + if not reason: + sessions_to_delete.append(session) + status.update(f"[cyan]Analyzed {len(all_sessions)} sessions. Found {len(sessions_to_delete)} candidates for deletion.[/cyan]") + + # --- 2. Identify Generated PDFs to Delete --- + all_pdfs = conn.execute('SELECT id, session_id, filename, created_at, persist, source_filename, notes FROM generated_pdfs').fetchall() + + with console.status("[cyan]Analyzing generated PDFs...[/cyan]") as status: + for pdf in all_pdfs: + reason = "" + + created_at = datetime.fromisoformat(pdf['created_at']) + + if created_at > cutoff_date: + reason = REASON_RECENT + elif pdf['persist'] == 1: + reason = REASON_PERSISTED + elif pdf['source_filename'] and ('.json' in pdf['source_filename'].lower() or 'neetprep' in pdf['source_filename'].lower()): + reason = REASON_NEETPREP + elif pdf['notes'] and 'json upload' in pdf['notes'].lower(): + reason = REASON_NEETPREP + elif is_classified_session(conn, pdf['session_id']): + reason = REASON_CLASSIFIED + + if not reason: + pdfs_to_delete.append(pdf) + status.update(f"[cyan]Analyzed {len(all_pdfs)} PDFs. Found {len(pdfs_to_delete)} candidates for deletion.[/cyan]") + + # --- 3. Display Findings --- + table = Table(title="Items Marked for Deletion", show_header=True, header_style="bold magenta") + table.add_column("Type", style="dim", min_width=10) + table.add_column("ID / Filename", style="cyan", min_width=30) + table.add_column("Created At", style="green", min_width=20) + table.add_column("Age (Days)", style="yellow", min_width=10) + table.add_column("Details", min_width=30) + + if not sessions_to_delete and not pdfs_to_delete: + console.print("\n[bold green]No items found to delete. Everything is up to date.[/bold green]") + conn.close() + return + + for session in sessions_to_delete: + age = (datetime.now() - datetime.fromisoformat(session['created_at'])).days + table.add_row("Session", session['id'], session['created_at'], str(age), session['original_filename']) + + for pdf in pdfs_to_delete: + age = (datetime.now() - datetime.fromisoformat(pdf['created_at'])).days + table.add_row("Generated PDF", pdf['filename'], pdf['created_at'], str(age), f"Source: {pdf['source_filename']}") + + console.print(table) + + if DRY_RUN: + console.print("\n[bold yellow]This was a DRY RUN. No files or database records were deleted.[/bold yellow]") + console.print("To run the deletion, change the [code]DRY_RUN[/code] flag to [code]False[/code] in the script.") + else: + # --- 4. Perform Deletion --- + console.print("\n[bold red]PERFORMING DELETION...[/bold red]") + + # Delete Sessions and associated files + for session in sessions_to_delete: + session_id = session['id'] + console.print(f"Deleting session [cyan]{session_id}[/cyan]...") + images_to_delete = conn.execute('SELECT filename, processed_filename FROM images WHERE session_id = ?', (session_id,)).fetchall() + for img in images_to_delete: + if img['filename']: + try: + f_path = os.path.join(UPLOAD_FOLDER, img['filename']) + os.remove(f_path) + console.print(f" - Deleted upload: [dim]{f_path}[/dim]") + except OSError as e: + console.print(f" - [red]Error deleting {f_path}: {e}[/red]") + if img['processed_filename']: + try: + f_path = os.path.join(PROCESSED_FOLDER, img['processed_filename']) + os.remove(f_path) + console.print(f" - Deleted processed: [dim]{f_path}[/dim]") + except OSError as e: + console.print(f" - [red]Error deleting {f_path}: {e}[/red]") + + conn.execute('DELETE FROM questions WHERE session_id = ?', (session_id,)) + conn.execute('DELETE FROM images WHERE session_id = ?', (session_id,)) + conn.execute('DELETE FROM sessions WHERE id = ?', (session_id,)) + console.print(f" - Deleted DB records for session {session_id}") + + # Delete Generated PDFs and their files + for pdf in pdfs_to_delete: + pdf_id, pdf_filename = pdf['id'], pdf['filename'] + console.print(f"Deleting generated PDF [cyan]{pdf_filename}[/cyan]...") + try: + f_path = os.path.join(OUTPUT_FOLDER, pdf_filename) + os.remove(f_path) + console.print(f" - Deleted file: [dim]{f_path}[/dim]") + except OSError as e: + console.print(f" - [red]Error deleting {f_path}: {e}[/red]") + + conn.execute('DELETE FROM generated_pdfs WHERE id = ?', (pdf_id,)) + console.print(f" - Deleted DB record for PDF {pdf_id}") + + conn.commit() + console.print("\n[bold green]Deletion complete.[/bold green]") + + conn.close() + +if __name__ == "__main__": + main() diff --git a/cli.py b/cli.py new file mode 100644 index 0000000000000000000000000000000000000000..7c52223caf08533564cb007038442012a0a6517c --- /dev/null +++ b/cli.py @@ -0,0 +1,354 @@ +import os +import re +import sqlite3 +import sys +import uuid +from datetime import datetime, timedelta + +import click +import fitz # PyMuPDF +import requests +from rich.progress import ( + BarColumn, + Progress, + SpinnerColumn, + TextColumn, + TimeElapsedColumn, + TimeRemainingColumn, +) +from urllib.parse import urlparse +from werkzeug.utils import secure_filename + +# --- Configuration --- +from utils import get_db_connection + +SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) +UPLOAD_FOLDER = os.path.join(SCRIPT_DIR, 'uploads') +PROCESSED_FOLDER = os.path.join(SCRIPT_DIR, 'processed') +OUTPUT_FOLDER = os.path.join(SCRIPT_DIR, 'output') + +os.makedirs(UPLOAD_FOLDER, exist_ok=True) +os.makedirs(PROCESSED_FOLDER, exist_ok=True) +os.makedirs(OUTPUT_FOLDER, exist_ok=True) + + + +# --- Core Logic Functions (mirrored from app.py) --- +def setup_database_cli(): + """Initializes the database and creates/updates tables as needed.""" + conn = get_db_connection() + cursor = conn.cursor() + click.echo("Creating/updating tables...") + + cursor.execute("CREATE TABLE IF NOT EXISTS sessions (id TEXT PRIMARY KEY, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, original_filename TEXT, persist INTEGER DEFAULT 0, subject TEXT, tags TEXT, notes TEXT);") + cursor.execute("CREATE TABLE IF NOT EXISTS images (id INTEGER PRIMARY KEY AUTOINCREMENT, session_id TEXT NOT NULL, image_index INTEGER NOT NULL, filename TEXT NOT NULL, original_name TEXT NOT NULL, processed_filename TEXT, image_type TEXT DEFAULT 'original', FOREIGN KEY (session_id) REFERENCES sessions (id));") + cursor.execute("CREATE TABLE IF NOT EXISTS questions (id INTEGER PRIMARY KEY AUTOINCREMENT, session_id TEXT NOT NULL, image_id INTEGER NOT NULL, question_number TEXT, subject TEXT, status TEXT, marked_solution TEXT, actual_solution TEXT, time_taken TEXT, tags TEXT, FOREIGN KEY (session_id) REFERENCES sessions (id), FOREIGN KEY (image_id) REFERENCES images (id));") + cursor.execute("CREATE TABLE IF NOT EXISTS folders (id INTEGER PRIMARY KEY AUTOINCREMENT, name TEXT NOT NULL, parent_id INTEGER, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, FOREIGN KEY (parent_id) REFERENCES folders (id) ON DELETE CASCADE);") + cursor.execute("CREATE TABLE IF NOT EXISTS generated_pdfs (id INTEGER PRIMARY KEY AUTOINCREMENT, session_id TEXT NOT NULL, filename TEXT NOT NULL, subject TEXT NOT NULL, tags TEXT, notes TEXT, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, source_filename TEXT, folder_id INTEGER, persist INTEGER DEFAULT 0, FOREIGN KEY (session_id) REFERENCES sessions (id), FOREIGN KEY (folder_id) REFERENCES folders (id) ON DELETE SET NULL);") + cursor.execute("CREATE TABLE IF NOT EXISTS neetprep_questions (id TEXT PRIMARY KEY, question_text TEXT, options TEXT, correct_answer_index INTEGER, level TEXT, topic TEXT, subject TEXT, last_fetched_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP);") + cursor.execute("CREATE TABLE IF NOT EXISTS neetprep_processed_attempts (attempt_id TEXT PRIMARY KEY, processed_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP);") + + # Add columns to sessions table if they don't exist + try: + cursor.execute("SELECT subject FROM sessions LIMIT 1") + except sqlite3.OperationalError: + cursor.execute("ALTER TABLE sessions ADD COLUMN subject TEXT") + try: + cursor.execute("SELECT tags FROM sessions LIMIT 1") + except sqlite3.OperationalError: + cursor.execute("ALTER TABLE sessions ADD COLUMN tags TEXT") + try: + cursor.execute("SELECT notes FROM sessions LIMIT 1") + except sqlite3.OperationalError: + cursor.execute("ALTER TABLE sessions ADD COLUMN notes TEXT") + + try: + cursor.execute("SELECT tags FROM questions LIMIT 1") + except sqlite3.OperationalError: + cursor.execute("ALTER TABLE questions ADD COLUMN tags TEXT") + + click.echo("Tables created successfully.") + conn.commit() + conn.close() + +def cleanup_old_data_cli(): + """Removes sessions, files, and PDFs older than 1 day, unless persisted.""" + conn = get_db_connection() + cutoff = datetime.now() - timedelta(days=1) + click.echo(f"Starting cleanup for items older than {cutoff.strftime('%Y-%m-%d %H:%M:%S')}:") + + old_sessions = conn.execute('SELECT id FROM sessions WHERE created_at < ? AND persist = 0', (cutoff,)).fetchall() + click.echo(f"Found {len(old_sessions)} old, non-persisted sessions to delete.") + for session in old_sessions: + session_id = session['id'] + images_to_delete = conn.execute('SELECT filename, processed_filename FROM images WHERE session_id = ?', (session_id,)).fetchall() + for img in images_to_delete: + if img['filename'] and os.path.exists(os.path.join(UPLOAD_FOLDER, img['filename'])): os.remove(os.path.join(UPLOAD_FOLDER, img['filename'])) + if img['processed_filename'] and os.path.exists(os.path.join(PROCESSED_FOLDER, img['processed_filename'])): os.remove(os.path.join(PROCESSED_FOLDER, img['processed_filename'])) + conn.execute('DELETE FROM questions WHERE session_id = ?', (session_id,)) + conn.execute('DELETE FROM images WHERE session_id = ?', (session_id,)) + conn.execute('DELETE FROM sessions WHERE id = ?', (session_id,)) + + old_pdfs = conn.execute('SELECT id, filename FROM generated_pdfs WHERE created_at < ? AND persist = 0', (cutoff,)).fetchall() + click.echo(f"Found {len(old_pdfs)} old, non-persisted generated PDFs to delete.") + for pdf in old_pdfs: + if os.path.exists(os.path.join(OUTPUT_FOLDER, pdf['filename'])): os.remove(os.path.join(OUTPUT_FOLDER, pdf['filename'])) + conn.execute('DELETE FROM generated_pdfs WHERE id = ?', (pdf['id'],)) + + conn.commit() + conn.close() + +def _get_local_pdf_path(path_or_url): + """ + Takes a path or URL. If it's a URL, downloads it to the UPLOAD_FOLDER. + Returns (local_path, original_filename, is_temp_file) + """ + is_url = path_or_url.lower().startswith(('http://', 'https://')) + if is_url: + click.echo(f"Downloading from URL: {path_or_url}") + try: + if "drive.google.com" in path_or_url: + file_id = path_or_url.split('/')[-2] + download_url = f'https://drive.google.com/uc?export=download&id={file_id}' + response = requests.get(download_url, stream=True) + content_disposition = response.headers.get('content-disposition') + if content_disposition: + filenames = re.findall('filename="(.+)"', content_disposition) + original_name = secure_filename(filenames[0]) if filenames else f"{str(uuid.uuid4())}.pdf" + else: + original_name = f"{str(uuid.uuid4())}.pdf" + elif path_or_url.lower().endswith('.pdf'): + response = requests.get(path_or_url, stream=True) + response.raise_for_status() + original_name = secure_filename(path_or_url.split('/')[-1]) or f"{str(uuid.uuid4())}.pdf" + else: + raise ValueError("URL is not a recognized Google Drive or direct .pdf link.") + + local_path = os.path.join(UPLOAD_FOLDER, f"temp_{original_name}") + with open(local_path, 'wb') as f: + for chunk in response.iter_content(chunk_size=8192): + f.write(chunk) + + return local_path, original_name, True + except Exception as e: + click.secho(f"Error downloading file: {e}", fg="red", err=True) + return None, None, False + else: + if not os.path.exists(path_or_url): + click.secho(f"Error: File not found at {path_or_url}", fg="red", err=True) + return None, None, False + return path_or_url, secure_filename(os.path.basename(path_or_url)), False + +# --- CLI Group --- +@click.group() +def cli(): + """A CLI for managing the Report Generator application.""" + pass + +# --- CLI Commands --- +@cli.command() +def db_init(): + """Initializes or updates the database schema.""" + click.secho("Initializing database schema...", fg="yellow") + setup_database_cli() + click.secho("Database schema is up to date.", fg="green") + +@cli.command() +def db_cleanup(): + """Cleans up old, non-persisted data.""" + click.secho("Starting cleanup of old data...", fg="yellow") + cleanup_old_data_cli() + click.secho("Cleanup finished.", fg="green") + +@cli.command('add-question') +@click.option('--session-id', required=True, type=click.STRING) +@click.option('--image-path', required=True, type=click.Path(exists=True)) +@click.option('--q-num', type=click.STRING) +@click.option('--status', type=click.Choice(['Correct', 'Wrong', 'Unattempted'])) +@click.option('--marked-ans', type=click.STRING) +@click.option('--correct-ans', type=click.STRING) +@click.option('--subject', type=click.STRING) +@click.option('--time', type=click.STRING) +def add_question(session_id, image_path, q_num, status, marked_ans, correct_ans, subject, time): + """Adds a single question with metadata to the database.""" + setup_database_cli() # Ensure database tables exist + try: + conn = get_db_connection() + cursor = conn.cursor() + + # 1. Copy image to processed folder + original_filename = secure_filename(os.path.basename(image_path)) + processed_filename = f"processed_{session_id}_{str(uuid.uuid4())[:8]}_{original_filename}" + processed_path = os.path.join(PROCESSED_FOLDER, processed_filename) + import shutil + shutil.copy(image_path, processed_path) + + # 2. Create a new image record + # Find the next available image_index for the session + cursor.execute("SELECT MAX(image_index) FROM images WHERE session_id = ?", (session_id,)) + max_index = cursor.fetchone()[0] + new_index = (max_index or -1) + 1 + + cursor.execute( + 'INSERT INTO images (session_id, image_index, filename, original_name, processed_filename, image_type) VALUES (?, ?, ?, ?, ?, ?)', + (session_id, new_index, original_filename, original_filename, processed_filename, 'cropped') + ) + image_id = cursor.lastrowid + + # 3. Create a new question record + cursor.execute( + 'INSERT INTO questions (session_id, image_id, question_number, status, marked_solution, actual_solution, subject, time_taken) VALUES (?, ?, ?, ?, ?, ?, ?, ?)', + (session_id, image_id, q_num, status, marked_ans, correct_ans, subject, time) + ) + + conn.commit() + click.secho(f"Successfully added question {q_num} (Image ID: {image_id}) to session {session_id}.", fg="green") + + except Exception as e: + click.secho(f"Error adding question: {e}", fg="red", err=True) + raise click.Abort() + finally: + if conn: + conn.close() + +@cli.command('upload') +@click.argument('pdf_paths', type=click.STRING) +@click.option('--simple-progress', is_flag=True, help='Print simple percentage progress to stdout.') +@click.option('--final', is_flag=True, help='Mark the PDF as a final version and add to generated_pdfs table.') +@click.option('--subject', type=click.STRING, help='Subject for the final PDF.') +@click.option('--tags', type=click.STRING, help='Tags for the final PDF (comma-separated).') +@click.option('--notes', type=click.STRING, help='Notes for the final PDF.') +@click.option('--log', is_flag=True, help='Log all output to cli.log.') +def upload(pdf_paths, simple_progress, final, subject, tags, notes, log): + """ + A CLI tool to upload a large PDF directly to the application's database. + PDF_PATHS: A comma-separated list of full paths to the PDF files you wish to upload or Google Drive URLs. + """ + setup_database_cli() # Ensure database tables exist + if log: + try: + log_f = open('cli.log', 'a') + sys.stdout = log_f + sys.stderr = log_f + except Exception as e: + click.secho(f"Error opening log file: {e}", fg="red", err=True) + raise click.Abort() + + click.echo(f"--- Log entry: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} ---") + click.echo(f"Arguments: pdf_paths={pdf_paths}, simple_progress={simple_progress}, final={final}, subject={subject}, tags={tags}, notes={notes}, log={log}") + click.echo("---" * 20) + + files_to_process = [p.strip() for p in pdf_paths.split(',')] + + for pdf_path_or_url in files_to_process: + click.secho(f"--- Processing: {click.style(pdf_path_or_url, bold=True)} ---", fg="yellow") + + local_pdf_path, original_filename, is_temp = _get_local_pdf_path(pdf_path_or_url) + + if not local_pdf_path: + continue + + try: + if final: + if not subject: + click.secho("Error: --subject is required when using --final.", fg="red", err=True) + raise click.Abort() + + session_id = str(uuid.uuid4()) + conn = get_db_connection() + cursor = conn.cursor() + cursor.execute('INSERT INTO sessions (id, original_filename) VALUES (?, ?)', + (session_id, original_filename)) + + output_filename = original_filename + output_path = os.path.join(OUTPUT_FOLDER, output_filename) + + if os.path.exists(output_path): + timestamp = datetime.now().strftime('%Y%m%d%H%M%S') + output_filename = f"{timestamp}_{original_filename}" + output_path = os.path.join(OUTPUT_FOLDER, output_filename) + click.secho(f"Warning: File '{original_filename}' already exists. Saving as '{output_filename}'.", fg="yellow") + + import shutil + shutil.copy(local_pdf_path, output_path) + + cursor.execute( + 'INSERT INTO generated_pdfs (session_id, filename, subject, tags, notes, source_filename) VALUES (?, ?, ?, ?, ?, ?)', + (session_id, output_filename, subject, tags, notes, original_filename) + ) + conn.commit() + conn.close() + click.secho(f"Successfully added final PDF '{original_filename}' to the database.", fg="green") + + else: # Standard page-extraction mode + click.echo(f"Processing PDF: {click.style(original_filename, bold=True)}") + session_id = str(uuid.uuid4()) + doc = fitz.open(local_pdf_path) + num_pages = len(doc) + if num_pages == 0: + click.secho("Warning: This PDF has 0 pages. Nothing to process.", fg="yellow") + continue + + click.echo(f"PDF contains {num_pages} pages to process.") + conn = get_db_connection() + cursor = conn.cursor() + cursor.execute('INSERT INTO sessions (id, original_filename) VALUES (?, ?)', + (session_id, original_filename)) + click.echo(f"Created session: {click.style(session_id, fg='cyan')}") + + images_to_insert = [] + + if simple_progress: + for i, page in enumerate(doc): + pix = page.get_pixmap(dpi=150) + page_filename = f"{session_id}_page_{i}.png" + page_path = os.path.join(UPLOAD_FOLDER, page_filename) + pix.save(page_path) + images_to_insert.append( + (session_id, i, page_filename, f"Page {i + 1}", 'original') + ) + percentage = int(((i + 1) / num_pages) * 100) + sys.stdout.write(f"{percentage}\n") + sys.stdout.flush() + else: + progress = Progress( + SpinnerColumn(), + TextColumn("[progress.description]{task.description}"), + BarColumn(bar_width=None), + TextColumn("[progress.percentage]{task.percentage:>3.0f}%"), + TextColumn("β€’ Page {task.completed}/{task.total}"), + TextColumn("β€’ Elapsed:"), TimeElapsedColumn(), + TextColumn("β€’ Remaining:"), TimeRemainingColumn(), + ) + with progress: + task = progress.add_task("[green]Extracting pages...", total=num_pages) + for i, page in enumerate(doc): + pix = page.get_pixmap(dpi=150) + page_filename = f"{session_id}_page_{i}.png" + page_path = os.path.join(UPLOAD_FOLDER, page_filename) + pix.save(page_path) + images_to_insert.append( + (session_id, i, page_filename, f"Page {i + 1}", 'original') + ) + progress.update(task, advance=1) + + click.echo("\nInserting image records into the database...") + cursor.executemany( + 'INSERT INTO images (session_id, image_index, filename, original_name, image_type) VALUES (?, ?, ?, ?, ?)', + images_to_insert + ) + conn.commit() + click.secho(f"Successfully committed {len(images_to_insert)} records to the database.", fg="green") + doc.close() + + except Exception as e: + click.secho(f"An unexpected error occurred while processing {original_filename}: {e}", fg="red", err=True) + + finally: + if is_temp and os.path.exists(local_pdf_path): + os.remove(local_pdf_path) + + click.secho(f"\nβœ… All done! Upload complete for '{original_filename}'.", fg="green", bold=True) + +if __name__ == '__main__': + cli() diff --git a/clients.json b/clients.json new file mode 100644 index 0000000000000000000000000000000000000000..3bbb9cfca07bf81fc2b826b510813bb9417ffa97 --- /dev/null +++ b/clients.json @@ -0,0 +1,49 @@ +{ + "wooderin": { + "name": "charges-ready", + "client_id": "wooderin", + "sessions": { + "3_11_2025__10_18_18_pm.wooderin": { + "tasks": { + "338db4f6-bf6a-4171-9856-da4f64c00019": { + "task_id": "338db4f6-bf6a-4171-9856-da4f64c00019", + "progress": { + "audio": { + "type": "audio", + "total": 2132, + "current": 2132, + "percentage": 100.0, + "segment_num": 2101, + "success": true, + "failed_segments": [], + "timestamp": "2025-11-03 16:51:00" + }, + "video": { + "type": "video", + "total": 2131, + "current": 2131, + "percentage": 100.0, + "segment_num": 2125, + "success": true, + "failed_segments": [], + "timestamp": "2025-11-03 16:51:49" + } + }, + "status": "completed", + "name": "Oscillations_01__Feel_of_SHM__Rescheduled_at_0830_AM__NO_DPP_", + "out_dir": "/data/data/com.termux/files/home/pwdlv3/webdl", + "id": "68b7ea1c18f010ab787a2fc9", + "batch_name": "678b4cf5a3a368218a2b16e7", + "topic_name": null, + "lecture_url": null, + "client_id": "wooderin", + "session_id": "3_11_2025__10_18_18_pm.wooderin", + "url": "/get-file/338db4f6-bf6a-4171-9856-da4f64c00019/Oscillations_01__Feel_of_SHM__Rescheduled_at_0830_AM__NO_DPP_" + } + }, + "name": "invitations-cancel", + "timestamp": "2025-11-03 22:18:19" + } + } + } +} \ No newline at end of file diff --git a/color_rm.md b/color_rm.md new file mode 100644 index 0000000000000000000000000000000000000000..20acbc075413e6013cfb9ebed9010bb96e7076ad --- /dev/null +++ b/color_rm.md @@ -0,0 +1,53 @@ +# Color Removal Tool (`color_rm`) + +This document provides an overview of the Color Removal Tool, explaining its functionality, interface, and workflow. + +## Overview + +The Color Removal Tool is a feature designed to process images by selectively removing colors. The core idea is to keep specific colors in an image while removing all others. This is particularly useful for cleaning up scanned documents, highlighting specific content, or creating stylized images. + +The tool operates on the principle of "colors to keep". You select the colors you want to preserve, and the tool removes everything else. + +## Workflow + +The process of using the Color Removal Tool is as follows: + +1. **Upload:** You start by uploading a PDF or a set of images. If a PDF is uploaded, it is automatically split into individual pages, which are treated as images. + +2. **Interface:** You are then taken to the color removal interface. Here, you can navigate through the pages of your document one by one. + +3. **Color Selection:** + * **From Image:** Click directly on the image to select a color you want to keep. The selected color will appear as a swatch in the "Selected Colors" list. + * **Manual Picker:** Use the color picker to choose a color manually and add it to the list. + +4. **Adjust Settings:** + * **Match Strictness:** This slider controls how closely a color in the image must match a selected color to be kept. A stricter setting means only very similar colors will be kept. A looser setting will keep a wider range of shades. + * **Background Fill:** You can choose what the removed parts of the image are filled with: black, white, or transparent. + * **Region Selection:** You can choose to apply the color removal effect only to a specific rectangular region of the image. + +5. **Preview:** Click the "Preview" button to see the result of your selections and settings applied to the current image. + +6. **Save & Continue:** + * **Save:** Saves the processed image for the current page and automatically moves to the next page. + * **Apply to All Pages:** This will process all pages in the session using the current settings (colors, strictness, etc.). This is a batch operation and may take some time. + +7. **Generate PDF:** Once you have processed the desired pages, you can generate a PDF of the results. You can choose to include all pages or a specific range of pages. + +## Processing Logic + +The color matching is not based on simple RGB values. Instead, it uses the **CIELAB color space**. This color space is designed to approximate human vision, so the "distance" between two colors in this space is more perceptually uniform. + +When you select a color, the tool calculates its LAB value. Then, for each pixel in the image, it calculates the difference (Delta E) between the pixel's color and the selected colors. If the difference is below the "Match Strictness" threshold for any of the selected colors, the pixel is kept. Otherwise, it is removed. + +## Keyboard Shortcuts + +The following keyboard shortcuts are available in the color removal interface for faster workflow: + +* `ArrowRight`: Go to the next page. +* `ArrowLeft`: Go to the previous page. +* `Enter`: Save the current page and go to the next one. +* `p` or `P`: Toggle the preview. +* `r` or `R`: Reset the current image to its original state (a confirmation will be asked). +* `Delete` or `Backspace`: Remove the last selected color. + +**Note:** Shortcuts are disabled when you are typing in an input field (e.g., the page number input). diff --git a/config.py b/config.py new file mode 100644 index 0000000000000000000000000000000000000000..5831e155ffbfb3cb941350ade9ddb48e4cab5d07 --- /dev/null +++ b/config.py @@ -0,0 +1,18 @@ +import os + +class Config: + MAX_CONTENT_LENGTH = 16 * 1024 * 1024 * 4096 + UPLOAD_FOLDER = 'uploads' + PROCESSED_FOLDER = 'processed' + OUTPUT_FOLDER = 'output' + DATABASE = 'database.db' + NVIDIA_API_KEY = os.getenv("NVIDIA_API_KEY") + NIM_API_URL = "https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-ocr-v1" + NIM_HEADERS = { + "Authorization": f"Bearer {NVIDIA_API_KEY}", + "Accept": "application/json", + "Content-Type": "application/json", + } + MODEL_MAX_WIDTH = 500 + MODEL_MAX_HEIGHT = 500 + NVIDIA_NIM_AVAILABLE = bool(NVIDIA_API_KEY) diff --git a/dash b/dash new file mode 100644 index 0000000000000000000000000000000000000000..f447508997ae5f1bf30a0684791aa12a0d6c6711 --- /dev/null +++ b/dash @@ -0,0 +1,724 @@ +{% extends "base.html" %} + +{% block title %}Subjective Questions Manager{% endblock %} + +{% block head %} + + + + + + +{% endblock %} + +{% block content %} +
+ + +
+
+

Subjective Questions

+ +
+
+ + + + + + Print PDF + + + Generator + +
+
+ + + {% with messages = get_flashed_messages(with_categories=true) %} + {% if messages %} + {% for category, message in messages %} + + {% endfor %} + {% endif %} + {% endwith %} + + + {% if subfolders %} +
+ {% for folder in subfolders %} + + {% endfor %} +
+ {% endif %} + + + {% if grouped_questions %} + {% for topic, questions_list in grouped_questions.items() %} +
+
+
{{ topic }}
+
+ + + + {{ questions_list|length }} Questions +
+ +
+
+
+
+
+ + + + + + + + + + + {% for q in questions_list %} + + + + + + + {% endfor %} + +
#QuestionActions
{{ q.question_number_within_topic }}
{{ q.question_html | safe }}
+ + +
+
+
+
+ {% endfor %} + {% elif not subfolders %} +
+

No Content

+

This folder is empty. Generate new questions or create subfolders.

+ Generate Questions +
+ {% endif %} +
+ + + + + + + +{% include 'reorder_modal.html' %} + + + + + + + + +{% endblock %} diff --git a/dashboard.py b/dashboard.py new file mode 100644 index 0000000000000000000000000000000000000000..eeef5f582f7d72605b8eacc59b24179bb3b743fb --- /dev/null +++ b/dashboard.py @@ -0,0 +1,253 @@ + +from flask import Blueprint, render_template, request, redirect, url_for, flash, jsonify +from flask_login import login_required, current_user +from database import get_db_connection +import os +from flask import current_app + +dashboard_bp = Blueprint('dashboard', __name__) + +def get_session_size(session_id, user_id): + """Calculate the total size of files associated with a session.""" + import os + from flask import current_app + + # Import logging + try: + from rich.console import Console + from rich.table import Table + console = Console() + rich_available = True + except ImportError: + # Rich not available, just use basic logging + console = None + rich_available = False + + current_app.logger.info(f"Calculating size for session_id: {session_id}") + + total_size = 0 + breakdown = [] + + conn = get_db_connection() + + # Get all images associated with the session + images = conn.execute(""" + SELECT filename, processed_filename, image_type + FROM images + WHERE session_id = ? + """, (session_id,)).fetchall() + + # Add sizes of original and processed images + for image in images: + # Add original file size (in upload folder) + if image['filename']: + file_path = os.path.join(current_app.config['UPLOAD_FOLDER'], image['filename']) + if os.path.exists(file_path): + size = os.path.getsize(file_path) + total_size += size + current_app.logger.info(f" Original image {image['filename']}: {size} bytes") + breakdown.append(("Original Image", image['filename'], size)) + else: + current_app.logger.info(f" Original image file not found: {file_path}") + + # Add processed/cropped image size (in processed folder) + if image['processed_filename']: + file_path = os.path.join(current_app.config['PROCESSED_FOLDER'], image['processed_filename']) + if os.path.exists(file_path): + size = os.path.getsize(file_path) + total_size += size + current_app.logger.info(f" Processed image {image['processed_filename']}: {size} bytes") + breakdown.append(("Processed Image", image['processed_filename'], size)) + else: + current_app.logger.info(f" Processed image file not found: {file_path}") + + # Add size of original PDF file if it exists + session_info = conn.execute("SELECT original_filename FROM sessions WHERE id = ?", (session_id,)).fetchone() + if session_info and session_info['original_filename']: + # Try to find the original PDF in the upload folder with the session ID prefix + pdf_filename = f"{session_id}_{session_info['original_filename']}" + pdf_path = os.path.join(current_app.config['UPLOAD_FOLDER'], pdf_filename) + if os.path.exists(pdf_path): + size = os.path.getsize(pdf_path) + total_size += size + current_app.logger.info(f" Original PDF {pdf_filename}: {size} bytes") + breakdown.append(("Original PDF", pdf_filename, size)) + else: + current_app.logger.info(f" Original PDF file not found: {pdf_path}") + + # Add size of any generated PDFs for this session + generated_pdfs = conn.execute(""" + SELECT filename + FROM generated_pdfs + WHERE session_id = ? + """, (session_id,)).fetchall() + + for pdf in generated_pdfs: + if pdf['filename']: + pdf_path = os.path.join(current_app.config['OUTPUT_FOLDER'], pdf['filename']) + if os.path.exists(pdf_path): + size = os.path.getsize(pdf_path) + total_size += size + current_app.logger.info(f" Generated PDF {pdf['filename']}: {size} bytes") + breakdown.append(("Generated PDF", pdf['filename'], size)) + else: + current_app.logger.info(f" Generated PDF file not found: {pdf_path}") + + current_app.logger.info(f"Total size for session {session_id}: {total_size} bytes") + + # Create a rich table to show breakdown if rich is available + if rich_available and console: + table = Table(title=f"Session {session_id} Size Breakdown") + table.add_column("File Type", style="cyan") + table.add_column("Filename", style="magenta") + table.add_column("Size (bytes)", style="green") + + for file_type, filename, size in breakdown: + table.add_row(file_type, filename, str(size)) + + if breakdown: + console.print(table) + else: + console.print(f"[yellow]No files found for session {session_id}[/yellow]") + + conn.close() + return total_size + + +def format_file_size(size_bytes): + """Convert bytes to human readable format.""" + if size_bytes == 0: + return "0 B" + + size_names = ["B", "KB", "MB", "GB"] + import math + i = int(math.floor(math.log(size_bytes, 1024))) + p = math.pow(1024, i) + s = round(size_bytes / p, 2) + return f"{s} {size_names[i]}" + + +@dashboard_bp.route('/dashboard') +@login_required +def dashboard(): + # Check if size parameter is passed + show_size = request.args.get('size', type=int) + + conn = get_db_connection() + sessions_rows = conn.execute(""" + SELECT s.id, s.created_at, s.original_filename, s.persist, s.name, s.session_type, + COUNT(CASE WHEN i.image_type = 'original' THEN 1 END) as page_count, + COUNT(CASE WHEN i.image_type = 'cropped' THEN 1 END) as question_count + FROM sessions s + LEFT JOIN images i ON s.id = i.session_id + WHERE s.user_id = ? + GROUP BY s.id, s.created_at, s.original_filename, s.persist, s.name, s.session_type + ORDER BY s.created_at DESC + """, (current_user.id,)).fetchall() + + sessions = [] + for session in sessions_rows: + session_dict = dict(session) + + # Calculate total size for this session only if requested + if show_size: + session_size = get_session_size(session_dict['id'], current_user.id) + session_dict['total_size'] = session_size + session_dict['total_size_formatted'] = format_file_size(session_size) + + sessions.append(session_dict) + + conn.close() + + return render_template('dashboard.html', sessions=sessions, show_size=bool(show_size)) + +@dashboard_bp.route('/sessions/batch_delete', methods=['POST']) +@login_required +def batch_delete_sessions(): + data = request.json + session_ids = data.get('ids', []) + + if not session_ids: + return jsonify({'error': 'No session IDs provided'}), 400 + + try: + conn = get_db_connection() + for session_id in session_ids: + # Security Check: Ensure the session belongs to the current user + session_owner = conn.execute('SELECT user_id FROM sessions WHERE id = ?', (session_id,)).fetchone() + if not session_owner or session_owner['user_id'] != current_user.id: + # Silently skip or log an error, but don't delete + current_app.logger.warning(f"User {current_user.id} attempted to delete unauthorized session {session_id}.") + continue + + # Delete associated files + images_to_delete = conn.execute('SELECT filename, processed_filename FROM images WHERE session_id = ?', (session_id,)).fetchall() + for img in images_to_delete: + if img['filename']: + try: + os.remove(os.path.join(current_app.config['UPLOAD_FOLDER'], img['filename'])) + except OSError: + pass + if img['processed_filename']: + try: + os.remove(os.path.join(current_app.config['PROCESSED_FOLDER'], img['processed_filename'])) + except OSError: + pass + + # Delete from database + conn.execute('DELETE FROM questions WHERE session_id = ?', (session_id,)) + conn.execute('DELETE FROM images WHERE session_id = ?', (session_id,)) + conn.execute('DELETE FROM sessions WHERE id = ?', (session_id,)) + + conn.commit() + conn.close() + + return jsonify({'success': True}) + except Exception as e: + return jsonify({'error': str(e)}), 500 + + +@dashboard_bp.route('/sessions/reduce_space/', methods=['POST']) +@login_required +def reduce_space(session_id): + """Truncate original page images to reduce disk space.""" + try: + conn = get_db_connection() + + # Security Check: Ensure the session belongs to the current user + session_owner = conn.execute('SELECT user_id FROM sessions WHERE id = ?', (session_id,)).fetchone() + if not session_owner or session_owner['user_id'] != current_user.id: + current_app.logger.warning(f"User {current_user.id} attempted to reduce space for unauthorized session {session_id}.") + return jsonify({'error': 'Unauthorized access to session'}), 403 + + # Get all original images associated with the session + images = conn.execute(""" + SELECT filename + FROM images + WHERE session_id = ? AND image_type = 'original' + """, (session_id,)).fetchall() + + # Truncate original images to reduce space + truncated_count = 0 + for image in images: + if image['filename']: + file_path = os.path.join(current_app.config['UPLOAD_FOLDER'], image['filename']) + if os.path.exists(file_path): + try: + # Truncate the file to 0 bytes + with open(file_path, 'w') as f: + f.truncate(0) + truncated_count += 1 + except OSError as e: + current_app.logger.error(f"Error truncating file {file_path}: {str(e)}") + + conn.close() + + return jsonify({ + 'success': True, + 'truncated_count': truncated_count, + 'message': f'Successfully reduced space by truncating {truncated_count} original page images' + }) + except Exception as e: + current_app.logger.error(f"Error in reduce space: {str(e)}") + return jsonify({'error': str(e)}), 500 diff --git a/database.py b/database.py new file mode 100644 index 0000000000000000000000000000000000000000..7b69200f78bd4aea3cd0dc88cd03bbea6fdb602f --- /dev/null +++ b/database.py @@ -0,0 +1,466 @@ + +import os +import sqlite3 +from datetime import datetime, timedelta +from flask import current_app +from utils import get_db_connection + +def setup_database(): + """Initializes the database and creates/updates tables as needed.""" + conn = get_db_connection() + cursor = conn.cursor() + + cursor.execute(""" + CREATE TABLE IF NOT EXISTS users ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + username TEXT NOT NULL UNIQUE, + email TEXT NOT NULL UNIQUE, + password_hash TEXT NOT NULL, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP + ); + """) + + # Create sessions table + cursor.execute(""" + CREATE TABLE IF NOT EXISTS sessions ( + id TEXT PRIMARY KEY, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + original_filename TEXT, + persist INTEGER DEFAULT 0, + name TEXT, + user_id INTEGER, + session_type TEXT DEFAULT 'standard' + ); + """) + + # Create images table + cursor.execute(""" + CREATE TABLE IF NOT EXISTS images ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + session_id TEXT NOT NULL, + image_index INTEGER NOT NULL, + filename TEXT NOT NULL, + original_name TEXT NOT NULL, + processed_filename TEXT, + image_type TEXT DEFAULT 'original', + box_id TEXT, + FOREIGN KEY (session_id) REFERENCES sessions (id) + ); + """) + + # Create questions table + cursor.execute(""" + CREATE TABLE IF NOT EXISTS questions ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + session_id TEXT NOT NULL, + image_id INTEGER NOT NULL, + question_number TEXT, + subject TEXT, + status TEXT, + marked_solution TEXT, + actual_solution TEXT, + time_taken TEXT, + tags TEXT, + FOREIGN KEY (session_id) REFERENCES sessions (id), + FOREIGN KEY (image_id) REFERENCES images (id) + ); + """) + + # Create folders table + cursor.execute(""" + CREATE TABLE IF NOT EXISTS folders ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + name TEXT NOT NULL, + parent_id INTEGER, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + FOREIGN KEY (parent_id) REFERENCES folders (id) ON DELETE CASCADE + ); + """) + + # Create generated_pdfs table + cursor.execute(""" + CREATE TABLE IF NOT EXISTS generated_pdfs ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + session_id TEXT NOT NULL, + filename TEXT NOT NULL, + subject TEXT NOT NULL, + tags TEXT, + notes TEXT, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + source_filename TEXT, + folder_id INTEGER, + persist INTEGER DEFAULT 0, + FOREIGN KEY (session_id) REFERENCES sessions (id), + FOREIGN KEY (folder_id) REFERENCES folders (id) ON DELETE SET NULL + ); + """) + + # Create neetprep_questions table + cursor.execute(""" + CREATE TABLE IF NOT EXISTS neetprep_questions ( + id TEXT PRIMARY KEY, + question_text TEXT, + options TEXT, + correct_answer_index INTEGER, + level TEXT, + topic TEXT, + subject TEXT, + last_fetched_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP + ); + """) + + # Create neetprep_processed_attempts table + cursor.execute(""" + CREATE TABLE IF NOT EXISTS neetprep_processed_attempts ( + attempt_id TEXT PRIMARY KEY, + processed_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP + ); + """) + + # Create subjective_folders table + cursor.execute(""" + CREATE TABLE IF NOT EXISTS subjective_folders ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + name TEXT NOT NULL, + parent_id INTEGER, + user_id INTEGER, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + FOREIGN KEY (parent_id) REFERENCES subjective_folders (id) ON DELETE CASCADE, + FOREIGN KEY (user_id) REFERENCES users (id) + ); + """) + + # Create subjective_questions table + cursor.execute(""" + CREATE TABLE IF NOT EXISTS subjective_questions ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + user_id INTEGER NOT NULL, + question_topic TEXT NOT NULL, + question_html TEXT NOT NULL, + question_number_within_topic TEXT, + folder_id INTEGER, + topic_order INTEGER DEFAULT 0, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + FOREIGN KEY (user_id) REFERENCES users (id), + FOREIGN KEY (folder_id) REFERENCES subjective_folders (id) ON DELETE SET NULL + ); + """) + + # Create drive_sources table + cursor.execute(""" + CREATE TABLE IF NOT EXISTS drive_sources ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + name TEXT NOT NULL, + url TEXT NOT NULL, + local_path TEXT, + last_synced TIMESTAMP, + user_id INTEGER, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + FOREIGN KEY (user_id) REFERENCES users (id) + ); + """) + + # Create pdf_access_history table + cursor.execute(""" + CREATE TABLE IF NOT EXISTS pdf_access_history ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + user_id INTEGER NOT NULL, + file_id TEXT NOT NULL, + filename TEXT NOT NULL, + source_type TEXT DEFAULT 'drive_api', + opened_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + FOREIGN KEY (user_id) REFERENCES users (id) + ); + """) + + # Create qtab_folders table for question table organization + cursor.execute(""" + CREATE TABLE IF NOT EXISTS qtab_folders ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + name TEXT NOT NULL, + parent_id INTEGER, + user_id INTEGER, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + FOREIGN KEY (parent_id) REFERENCES qtab_folders (id) ON DELETE CASCADE, + FOREIGN KEY (user_id) REFERENCES users (id) + ); + """) + + # Create qtab_images table for question-answer extraction + cursor.execute(""" + CREATE TABLE IF NOT EXISTS qtab_images ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + user_id INTEGER NOT NULL, + folder_id INTEGER, + filename TEXT NOT NULL, + original_name TEXT NOT NULL, + result_json TEXT, + status TEXT DEFAULT 'pending', + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + FOREIGN KEY (user_id) REFERENCES users (id), + FOREIGN KEY (folder_id) REFERENCES qtab_folders (id) ON DELETE SET NULL + ); + """) + + # --- Migrations --- + try: + cursor.execute("SELECT topic_order FROM subjective_questions LIMIT 1") + except sqlite3.OperationalError: + cursor.execute("ALTER TABLE subjective_questions ADD COLUMN topic_order INTEGER DEFAULT 0") + + try: + cursor.execute("SELECT folder_id FROM subjective_questions LIMIT 1") + except sqlite3.OperationalError: + cursor.execute("ALTER TABLE subjective_questions ADD COLUMN folder_id INTEGER REFERENCES subjective_folders(id) ON DELETE SET NULL") + + try: + cursor.execute("SELECT tags FROM questions LIMIT 1") + except sqlite3.OperationalError: + cursor.execute("ALTER TABLE questions ADD COLUMN tags TEXT") + + try: + cursor.execute("SELECT tags FROM questions LIMIT 1") + except sqlite3.OperationalError: + cursor.execute("ALTER TABLE questions ADD COLUMN tags TEXT") + + try: + cursor.execute("SELECT image_type FROM images LIMIT 1") + except sqlite3.OperationalError: + cursor.execute("ALTER TABLE images ADD COLUMN image_type TEXT DEFAULT 'original'") + + try: + cursor.execute("SELECT original_filename FROM sessions LIMIT 1") + except sqlite3.OperationalError: + cursor.execute("ALTER TABLE sessions ADD COLUMN original_filename TEXT") + + try: + cursor.execute("SELECT persist FROM sessions LIMIT 1") + except sqlite3.OperationalError: + cursor.execute("ALTER TABLE sessions ADD COLUMN persist INTEGER DEFAULT 0") + + try: + cursor.execute("SELECT name FROM sessions LIMIT 1") + except sqlite3.OperationalError: + cursor.execute("ALTER TABLE sessions ADD COLUMN name TEXT") + + try: + cursor.execute("SELECT persist FROM generated_pdfs LIMIT 1") + except sqlite3.OperationalError: + cursor.execute("ALTER TABLE generated_pdfs ADD COLUMN persist INTEGER DEFAULT 0") + + try: + cursor.execute("SELECT folder_id FROM generated_pdfs LIMIT 1") + except sqlite3.OperationalError: + cursor.execute("ALTER TABLE generated_pdfs ADD COLUMN folder_id INTEGER REFERENCES folders(id) ON DELETE SET NULL") + + try: + cursor.execute("SELECT question_text FROM questions LIMIT 1") + except sqlite3.OperationalError: + cursor.execute("ALTER TABLE questions ADD COLUMN question_text TEXT") + + try: + cursor.execute("SELECT chapter FROM questions LIMIT 1") + except sqlite3.OperationalError: + cursor.execute("ALTER TABLE questions ADD COLUMN chapter TEXT") + + # --- Multi-user Migrations --- + try: + cursor.execute("SELECT user_id FROM sessions LIMIT 1") + except sqlite3.OperationalError: + cursor.execute("ALTER TABLE sessions ADD COLUMN user_id INTEGER REFERENCES users(id)") + + try: + cursor.execute("SELECT user_id FROM generated_pdfs LIMIT 1") + except sqlite3.OperationalError: + cursor.execute("ALTER TABLE generated_pdfs ADD COLUMN user_id INTEGER REFERENCES users(id)") + + try: + cursor.execute("SELECT user_id FROM folders LIMIT 1") + except sqlite3.OperationalError: + cursor.execute("ALTER TABLE folders ADD COLUMN user_id INTEGER REFERENCES users(id)") + + try: + cursor.execute("SELECT neetprep_enabled FROM users LIMIT 1") + except sqlite3.OperationalError: + cursor.execute("ALTER TABLE users ADD COLUMN neetprep_enabled INTEGER DEFAULT 1") + + try: + cursor.execute("SELECT dpi FROM users LIMIT 1") + except sqlite3.OperationalError: + cursor.execute("ALTER TABLE users ADD COLUMN dpi INTEGER DEFAULT 100") + + try: + cursor.execute("SELECT color_rm_dpi FROM users LIMIT 1") + except sqlite3.OperationalError: + cursor.execute("ALTER TABLE users ADD COLUMN color_rm_dpi INTEGER DEFAULT 200") + + try: + cursor.execute("SELECT box_id FROM images LIMIT 1") + except sqlite3.OperationalError: + cursor.execute("ALTER TABLE images ADD COLUMN box_id TEXT") + + try: + cursor.execute("SELECT session_type FROM sessions LIMIT 1") + except sqlite3.OperationalError: + cursor.execute("ALTER TABLE sessions ADD COLUMN session_type TEXT DEFAULT 'standard'") + + try: + cursor.execute("SELECT v2_default FROM users LIMIT 1") + except sqlite3.OperationalError: + cursor.execute("ALTER TABLE users ADD COLUMN v2_default INTEGER DEFAULT 0") + + try: + cursor.execute("SELECT magnifier_enabled FROM users LIMIT 1") + except sqlite3.OperationalError: + cursor.execute("ALTER TABLE users ADD COLUMN magnifier_enabled INTEGER DEFAULT 1") + + try: + cursor.execute("SELECT source_type FROM drive_sources LIMIT 1") + except sqlite3.OperationalError: + cursor.execute("ALTER TABLE drive_sources ADD COLUMN source_type TEXT DEFAULT 'folder'") + + try: + cursor.execute("SELECT google_token FROM users LIMIT 1") + except sqlite3.OperationalError: + cursor.execute("ALTER TABLE users ADD COLUMN google_token TEXT") + + try: + cursor.execute("SELECT question_json FROM subjective_questions LIMIT 1") + except sqlite3.OperationalError: + cursor.execute("ALTER TABLE subjective_questions ADD COLUMN question_json TEXT") + + try: + cursor.execute("SELECT classifier_model FROM users LIMIT 1") + except sqlite3.OperationalError: + cursor.execute("ALTER TABLE users ADD COLUMN classifier_model TEXT DEFAULT 'gemini'") + + conn.commit() + conn.close() + + +def cleanup_old_data(): + """Removes sessions, files, and PDFs older than 1 day, unless persisted.""" + print("Running cleanup of old data...") + conn = get_db_connection() + cutoff = datetime.now() - timedelta(days=1) + + old_sessions = conn.execute('SELECT id FROM sessions WHERE created_at < ? AND persist = 0', (cutoff,)).fetchall() + + for session in old_sessions: + session_id = session['id'] + print(f"Deleting old session: {session_id}") + + images_to_delete = conn.execute('SELECT filename, processed_filename FROM images WHERE session_id = ?', (session_id,)).fetchall() + for img in images_to_delete: + if img['filename']: + try: os.remove(os.path.join(current_app.config['UPLOAD_FOLDER'], img['filename'])) + except OSError: pass + if img['processed_filename']: + try: os.remove(os.path.join(current_app.config['PROCESSED_FOLDER'], img['processed_filename'])) + except OSError: pass + + conn.execute('DELETE FROM questions WHERE session_id = ?', (session_id,)) + conn.execute('DELETE FROM images WHERE session_id = ?', (session_id,)) + conn.execute('DELETE FROM sessions WHERE id = ?', (session_id,)) + + old_pdfs = conn.execute('SELECT id, filename FROM generated_pdfs WHERE created_at < ? AND persist = 0', (cutoff,)).fetchall() + for pdf in old_pdfs: + pdf_id, pdf_filename = pdf['id'], pdf['filename'] + print(f"Deleting old generated PDF: {pdf_filename}") + try: + os.remove(os.path.join(current_app.config['OUTPUT_FOLDER'], pdf_filename)) + except OSError: + pass + conn.execute('DELETE FROM generated_pdfs WHERE id = ?', (pdf_id,)) + + db_filenames = {row['filename'] for row in conn.execute('SELECT filename FROM generated_pdfs').fetchall()} + for filename in os.listdir(current_app.config['OUTPUT_FOLDER']): + if filename not in db_filenames: + file_path = os.path.join(current_app.config['OUTPUT_FOLDER'], filename) + file_mtime = datetime.fromtimestamp(os.path.getmtime(file_path)) + if file_mtime < cutoff: + print(f"Deleting old, orphaned PDF: {filename}") + try: + os.remove(file_path) + except OSError: + pass + + conn.commit() + conn.close() + print("Cleanup finished.") + +def get_folder_tree(user_id=None): + conn = get_db_connection() + if user_id: + folders = conn.execute('SELECT id, name, parent_id FROM folders WHERE user_id = ? ORDER BY name', (user_id,)).fetchall() + else: + # Fallback for old behavior or admin views + folders = conn.execute('SELECT id, name, parent_id FROM folders ORDER BY name').fetchall() + conn.close() + + folder_map = {f['id']: dict(f) for f in folders} + tree = [] + + for folder_id, folder in folder_map.items(): + if folder['parent_id']: + parent = folder_map.get(folder['parent_id']) + if parent: + if 'children' not in parent: + parent['children'] = [] + parent['children'].append(folder) + else: + tree.append(folder) + + return tree + +def get_subjective_folder_tree(user_id): + conn = get_db_connection() + folders = conn.execute('SELECT id, name, parent_id FROM subjective_folders WHERE user_id = ? ORDER BY name', (user_id,)).fetchall() + conn.close() + + folder_map = {f['id']: dict(f) for f in folders} + tree = [] + + for folder_id, folder in folder_map.items(): + if folder['parent_id']: + parent = folder_map.get(folder['parent_id']) + if parent: + if 'children' not in parent: + parent['children'] = [] + parent['children'].append(folder) + else: + tree.append(folder) + + return tree + +def get_qtab_folder_tree(user_id): + conn = get_db_connection() + folders = conn.execute('SELECT id, name, parent_id FROM qtab_folders WHERE user_id = ? ORDER BY name', (user_id,)).fetchall() + conn.close() + + folder_map = {f['id']: dict(f) for f in folders} + tree = [] + + for folder_id, folder in folder_map.items(): + if folder['parent_id']: + parent = folder_map.get(folder['parent_id']) + if parent: + if 'children' not in parent: + parent['children'] = [] + parent['children'].append(folder) + else: + tree.append(folder) + + return tree + +def get_all_descendant_folder_ids(conn, folder_id, user_id=None): + """Recursively gets all descendant folder IDs for a given folder, scoped to a user.""" + if user_id: + children = conn.execute('SELECT id FROM folders WHERE parent_id = ? AND user_id = ?', (folder_id, user_id)).fetchall() + else: + children = conn.execute('SELECT id FROM folders WHERE parent_id = ?', (folder_id,)).fetchall() + + folder_ids = [f['id'] for f in children] + for child_id in folder_ids: + folder_ids.extend(get_all_descendant_folder_ids(conn, child_id, user_id)) + return folder_ids diff --git a/docs/API_KEY_MANAGER_GUIDE.md b/docs/API_KEY_MANAGER_GUIDE.md new file mode 100644 index 0000000000000000000000000000000000000000..c3d1f89c60015da505016a70f0952aab949eb26d --- /dev/null +++ b/docs/API_KEY_MANAGER_GUIDE.md @@ -0,0 +1,303 @@ +# API Key Manager Guide + +## Overview + +The API Key Manager provides automatic failover and rotation across multiple API keys for the same service. If one API key fails due to rate limiting or errors, the system automatically switches to a backup key. + +## Features + +βœ… **Automatic Failover** - Switches to backup keys when one fails +βœ… **Load Balancing** - Rotates through keys using round-robin +βœ… **Failure Tracking** - Blocks keys after consecutive failures +βœ… **Auto-Recovery** - Unblocks keys after cooldown period +βœ… **Success Rate Monitoring** - Tracks performance of each key +βœ… **Thread-Safe** - Can be used in multi-threaded environments + +## Configuration + +### Setting Up Multiple API Keys + +You can configure multiple API keys for each service using environment variables: + +#### Method 1: Numbered Keys (Recommended) +```bash +# Gemini API Keys +export GEMINI_API_KEY_1="your-first-gemini-key" +export GEMINI_API_KEY_2="your-second-gemini-key" +export GEMINI_API_KEY_3="your-third-gemini-key" + +# NVIDIA API Keys +export NVIDIA_API_KEY_1="your-first-nvidia-key" +export NVIDIA_API_KEY_2="your-second-nvidia-key" + +# OpenRouter API Keys (for Nova) +export OPENROUTER_API_KEY_1="your-first-openrouter-key" +export OPENROUTER_API_KEY_2="your-second-openrouter-key" +``` + +#### Method 2: Single Key (Backward Compatible) +```bash +export GEMINI_API_KEY="your-gemini-key" +export NVIDIA_API_KEY="your-nvidia-key" +export OPENROUTER_API_KEY="your-openrouter-key" +``` + +#### Method 3: Mixed (Both Work Together) +```bash +# These will all be combined into the pool +export GEMINI_API_KEY="key-1" +export GEMINI_API_KEY_1="key-2" +export GEMINI_API_KEY_2="key-3" +# Result: 3 keys total (duplicates are automatically removed) +``` + +### Supported Services + +| Service | Environment Variable Pattern | Used For | +|---------|----------------------------|----------| +| `nvidia` | `NVIDIA_API_KEY` or `NVIDIA_API_KEY_1`, `NVIDIA_API_KEY_2`, etc. | OCR processing | +| `gemini` | `GEMINI_API_KEY`, `GOOGLE_API_KEY`, or numbered variants | Question classification, Q&A extraction | +| `openrouter` | `OPENROUTER_API_KEY` or `OPENROUTER_API_KEY_1`, etc. | Amazon Nova classification | + +## How It Works + +### 1. Key Rotation +Keys are automatically rotated using round-robin: +``` +Request 1 β†’ Key 1 +Request 2 β†’ Key 2 +Request 3 β†’ Key 3 +Request 4 β†’ Key 1 (back to start) +``` + +### 2. Failure Handling +When a key fails: +- Failure count is incremented +- After **3 consecutive failures**, the key is **blocked for 5 minutes** +- System automatically switches to next available key +- After cooldown period, key is automatically unblocked + +### 3. Success Tracking +When a key succeeds: +- Success count is incremented +- Failure count is reset to 0 +- Key is marked as available +- System rotates to next key for load balancing + +## Usage in Code + +### Automatic (Already Integrated) + +The API Key Manager is already integrated into: +- βœ… `gemini_classifier.py` - Gemini question classification +- βœ… `nova_classifier.py` - Nova question classification +- βœ… `processing.py` - NVIDIA OCR API + +**No code changes needed!** Just set up multiple API keys and the system handles the rest. + +### Manual Usage (Advanced) + +If you need to add API key management to other modules: + +```python +from api_key_manager import get_api_key_manager + +# Get the manager instance +manager = get_api_key_manager() + +# Get an API key +api_key, key_index = manager.get_key('gemini') + +if api_key: + try: + # Make your API call + response = make_api_call(api_key) + + # Mark as successful + manager.mark_success('gemini', key_index) + + except Exception as e: + # Mark as failed (will block after 3 failures) + manager.mark_failure('gemini', key_index) +else: + print("No API keys available!") +``` + +## Monitoring + +### Get Service Status + +```python +from api_key_manager import get_api_key_manager + +manager = get_api_key_manager() + +# Get status for one service +status = manager.get_service_status('gemini') +print(f"Available keys: {status['available_keys']}/{status['total_keys']}") +print(f"Blocked keys: {status['blocked_keys']}") + +# Get status for all services +all_status = manager.get_all_services_status() +for service, info in all_status.items(): + print(f"{service}: {info['available_keys']}/{info['total_keys']} keys available") +``` + +### Example Output +```json +{ + "service": "gemini", + "available": true, + "total_keys": 3, + "available_keys": 2, + "blocked_keys": 1, + "keys": [ + { + "index": 0, + "is_available": true, + "is_blocked": false, + "failure_count": 0, + "total_requests": 15, + "success_rate": 100.0, + "blocked_until": null + }, + { + "index": 1, + "is_available": true, + "is_blocked": false, + "failure_count": 0, + "total_requests": 12, + "success_rate": 100.0, + "blocked_until": null + }, + { + "index": 2, + "is_available": false, + "is_blocked": true, + "failure_count": 3, + "total_requests": 8, + "success_rate": 62.5, + "blocked_until": "2025-12-08T04:30:00.000000" + } + ] +} +``` + +## Configuration Options + +### Block Duration + +By default, keys are blocked for **5 minutes** after 3 failures. You can customize this: + +```python +# Block for 10 minutes instead +manager.mark_failure('gemini', key_index, block_duration_minutes=10) +``` + +### Failure Threshold + +The failure threshold is currently hardcoded to **3 consecutive failures**. This is defined in `api_key_manager.py` in the `mark_failure()` method: + +```python +if self.failure_count >= 3: + self.is_blocked = True +``` + +## Troubleshooting + +### Problem: "No API keys available" + +**Cause:** All keys are blocked or no keys are configured. + +**Solution:** +1. Check environment variables are set correctly +2. Wait for cooldown period (5 minutes) +3. Manually reset the service: + ```python + manager.reset_service('gemini') + ``` + +### Problem: Keys getting blocked frequently + +**Cause:** Rate limiting or invalid API keys. + +**Solution:** +1. Check API key validity +2. Verify rate limits with your API provider +3. Add more API keys to distribute load +4. Increase block duration to avoid rapid retries + +### Problem: Not using multiple keys even though they're configured + +**Cause:** Check if keys are being loaded correctly. + +**Solution:** +```python +manager = get_api_key_manager() +status = manager.get_service_status('gemini') +print(f"Total keys loaded: {status['total_keys']}") +``` + +## Best Practices + +1. **Use at least 2-3 keys per service** for better reliability +2. **Monitor success rates** to identify problematic keys +3. **Stagger API requests** to avoid hitting rate limits +4. **Keep backup keys from different accounts** if possible +5. **Test keys periodically** to ensure they're still valid + +## Logging + +The API Key Manager logs important events: + +``` +INFO: Loaded API keys: NVIDIA=2, Gemini=3, OpenRouter=2 +INFO: Registered 3 API key(s) for service: gemini +DEBUG: Using API key 1/3 for gemini +DEBUG: API key 1 for gemini marked as successful +WARNING: API key 2 for gemini marked as failed +WARNING: API key for gemini blocked until 2025-12-08 04:30:00 after 3 failures +INFO: API key for gemini unblocked after cooldown period +``` + +## Architecture + +``` +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ API Key Manager β”‚ +β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ +β”‚ β”‚ +β”‚ Service: nvidia Service: gemini Service: openrouterβ”‚ +β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ +β”‚ β”‚ Key 1 βœ“ β”‚ β”‚ Key 1 βœ“ β”‚ β”‚ Key 1 βœ“ β”‚ β”‚ +β”‚ β”‚ Key 2 βœ“ β”‚ β”‚ Key 2 βœ“ β”‚ β”‚ Key 2 βœ“ β”‚ β”‚ +β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ Key 3 βœ— β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ +β”‚ β”‚ (blocked) β”‚ β”‚ +β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ +β”‚ β”‚ +β”‚ Features: β”‚ +β”‚ β€’ Round-robin rotation β”‚ +β”‚ β€’ Automatic failover β”‚ +β”‚ β€’ Failure tracking β”‚ +β”‚ β€’ Auto-recovery after cooldown β”‚ +β”‚ β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ + β”‚ β”‚ β”‚ + β–Ό β–Ό β–Ό + processing.py gemini_classifier.py nova_classifier.py + (NVIDIA OCR) (Gemini AI) (Amazon Nova) +``` + +## Future Enhancements + +Potential improvements for the API Key Manager: + +- [ ] Web dashboard for monitoring key status +- [ ] Configurable failure threshold per service +- [ ] Exponential backoff for blocked keys +- [ ] API key health checks +- [ ] Cost tracking per key +- [ ] Rate limit detection and adaptive throttling +- [ ] Database persistence for key statistics +- [ ] Email alerts when all keys are blocked +- [ ] Integration with settings page for user-visible status diff --git a/docs/API_USAGE_LIST.md b/docs/API_USAGE_LIST.md new file mode 100644 index 0000000000000000000000000000000000000000..8f137baa60172ffd29d413a490e25f42e368a92d --- /dev/null +++ b/docs/API_USAGE_LIST.md @@ -0,0 +1,167 @@ +# API Usage Inventory + +This document lists all external API integrations in the application. + +## 1. NVIDIA NIM API +**Purpose:** OCR (Optical Character Recognition) for extracting text from images + +**API Key Required:** `NVIDIA_API_KEY` + +**Endpoints:** +- OCR: `https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-ocr-v1` +- Chat/Parser: `https://integrate.api.nvidia.com/v1/chat/completions` + +**Used In:** +- `config.py` - Configuration setup +- `processing.py` - `call_nim_ocr_api()` function for OCR +- `routes.py` - Question number extraction and redaction features +- `redact.py` - Picture redaction in images +- `test.sh` - Testing script +- `templates/question_entry_v2.html` - Frontend OCR feature + +**Features:** +- Automatic question number extraction from cropped images +- Text detection and OCR processing +- Image redaction for removing pictures from scanned documents + +--- + +## 2. Google Gemini API +**Purpose:** AI-powered question classification and question-answer extraction + +**API Key Required:** `GEMINI_API_KEY` or `GOOGLE_API_KEY` + +**Endpoints:** +- `https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-lite:generateContent` + +**Used In:** +- `gemini_classifier.py` - `classify_questions_with_gemini()` - Classifies questions into subjects and chapters +- `gemini_subjective.py` - Subjective question generation +- `qtab_routes.py` - `process_image_for_questions()` - Extracts question-answer pairs from images +- `classifier_routes.py` - Auto-classification of cropped questions (via user setting) +- `neetprep.py` - NeetPrep question classification (via user setting) + +**Features:** +- Automatic question classification by subject and NCERT chapter +- Question-answer pair extraction from answer key images +- Subjective question generation +- Model: `gemini-2.0-flash-lite` + +--- + +## 3. OpenRouter API (Amazon Nova) +**Purpose:** Alternative AI model for question classification + +**API Key Required:** `OPENROUTER_API_KEY` + +**Endpoints:** +- `https://openrouter.ai/api/v1/chat/completions` + +**Used In:** +- `nova_classifier.py` - `classify_questions_with_nova()` - Classifies questions using Amazon Nova model +- `classifier_routes.py` - Auto-classification (when user selects Nova model) +- `neetprep.py` - NeetPrep question classification (when user selects Nova model) +- `test.py` - Testing script for Nova API + +**Features:** +- Question classification by subject and NCERT chapter +- Alternative to Gemini classifier +- Model: `amazon/nova-2-lite-v1:free` +- User-selectable via Settings page + +--- + +## 4. NeetPrep GraphQL API +**Purpose:** Fetch questions and test attempts from NeetPrep platform + +**API Key Required:** None (uses session/headers) + +**Endpoints:** +- `https://www.neetprep.com/graphql` + +**Used In:** +- `neetprep.py` - `run_hardcoded_query()` function + +**Features:** +- Fetch user test attempts +- Get incorrect question IDs +- Retrieve question details (text, options, correct answer, level, topics) +- Batch processing of questions + +**GraphQL Queries:** +- `GetAttempts` - Fetch test attempts +- `GetIncorrectIds` - Get incorrect question IDs from attempts +- `GetQuestionDetails` - Retrieve full question data + +--- + +## 5. External Resource Downloads +**Purpose:** Download files from external sources + +**Used In:** +- `routes.py` - Download PDFs and images from URLs +- `cli.py` - CLI download functionality +- `utils.py` - Download Arial font from GitHub +- `json_processor.py` / `json_processor_v3.py` - Download images from URLs for PDF generation + +**Endpoints (Examples):** +- GitHub: `https://github.com/kavin808/arial.ttf/raw/refs/heads/master/arial.ttf` +- User-provided PDF/image URLs + +--- + +## API Key Summary + +| Environment Variable | Required For | Used By | +|---------------------|--------------|---------| +| `NVIDIA_API_KEY` | NVIDIA NIM OCR | processing.py, routes.py, redact.py, config.py | +| `GEMINI_API_KEY` or `GOOGLE_API_KEY` | Google Gemini AI | gemini_classifier.py, gemini_subjective.py, qtab_routes.py | +| `OPENROUTER_API_KEY` | Amazon Nova via OpenRouter | nova_classifier.py, test.py | + +--- + +## User-Configurable API Settings + +### Classifier Model Selection +**Location:** Settings page (`templates/settings.html`) + +**Database Field:** `users.classifier_model` + +**Options:** +1. **Gemini Classifier (Default)** - Uses Google Gemini API +2. **Amazon Nova Lite** - Uses OpenRouter API + +**Affects:** +- `classifier_routes.py` - Auto-classification of cropped questions +- `neetprep.py` - NeetPrep question classification + +Users can choose their preferred AI model for question classification based on: +- API key availability +- Model performance preferences +- Cost considerations + +--- + +## Rate Limiting & Timeouts + +### Configured Timeouts: +- NVIDIA NIM OCR: 300 seconds (5 minutes) +- Gemini API: 300 seconds (5 minutes) +- Nova API: 300 seconds (5 minutes) +- NeetPrep GraphQL: 30 seconds +- Font download: 30 seconds + +### Batch Processing: +- Classifier batch size: 7 questions per batch +- Wait time between batches: + - Classifier routes: 5 seconds + - NeetPrep: 6 seconds + +--- + +## Notes + +1. **API Key Storage:** All API keys are stored as environment variables, not in the database +2. **Error Handling:** All API calls include error handling with logging +3. **Fallback Behavior:** If APIs are unavailable, features gracefully disable with user notifications +4. **Security:** API keys are never exposed in templates or client-side code diff --git a/docs/API_v3.md b/docs/API_v3.md new file mode 100644 index 0000000000000000000000000000000000000000..e2ffb9001873bfc23805929414982b1ab9dcb4e7 --- /dev/null +++ b/docs/API_v3.md @@ -0,0 +1,197 @@ +# JSON Upload API v3.0 + +## Endpoint +`POST /json_upload_v3` + +## Description +This endpoint allows PWDLV3 (or any compatible client) to submit test data in a standardized JSON v3.0 format to the Report-Generator. The data includes test metadata, configuration for PDF generation, and detailed information about each question, including image URLs. Report-Generator will validate the schema, download images in parallel, store the data, and optionally generate a PDF or provide an edit URL. + +## Headers +* `Content-Type: application/json` (Required) +* `Authorization: Bearer ` (Optional, if authentication is enabled on Report-Generator) + +## Request Body +The request body must be a JSON object conforming to the following schema: + +```json +{ + "type": "object", + "properties": { + "version": { + "type": "string", + "const": "3.0", + "description": "API version, must be '3.0'" + }, + "source": { + "type": "string", + "description": "Source of the data, e.g., 'pwdlv3'", + "default": "manual" + }, + "test_name": { + "type": "string", + "description": "Name of the test" + }, + "test_id": { + "type": "string", + "description": "Unique ID of the test from the source system" + }, + "test_mapping_id": { + "type": "string", + "description": "Unique ID for mapping purposes, often same as test_id or a derivative" + }, + "metadata": { + "type": "object", + "patternProperties": { + ".*": { "type": "string" } + }, + "description": "Arbitrary key-value metadata for the session" + }, + "config": { + "type": "object", + "properties": { + "statuses_to_include": { + "type": "array", + "items": { "type": "string", "enum": ["wrong", "unattempted", "correct"] }, + "description": "Question statuses to include in generated reports" + }, + "layout": { + "type": "object", + "properties": { + "images_per_page": { "type": "integer", "minimum": 1 }, + "orientation": { "type": "string", "enum": ["portrait", "landscape"] } + }, + "required": ["images_per_page", "orientation"] + } + }, + "required": ["statuses_to_include", "layout"] + }, + "questions": { + "type": "array", + "items": { + "type": "object", + "properties": { + "question_number": { "type": "string", "description": "Display number for the question" }, + "image_url": { "type": "string", "format": "uri", "description": "URL of the question image" }, + "status": { "type": "string", "enum": ["wrong", "unattempted", "correct"], "description": "User's attempt status" }, + "marked_solution": { "type": "string", "description": "User's marked option/answer" }, + "correct_solution": { "type": "string", "description": "Correct option/answer" }, + "subject": { "type": "string", "description": "Subject of the question" }, + "chapter": { "type": "string", "description": "Chapter of the question" }, + "topic": { "type": "string", "description": "Topic of the question" }, + "time_taken": { "type": "integer", "minimum": 0, "description": "Time taken by user in seconds" } + }, + "required": ["question_number", "image_url", "status", "marked_solution", "correct_solution", "subject", "time_taken"] + }, + "minItems": 1 + }, + "view": { + "type": "boolean", + "description": "If true, Report-Generator will auto-generate PDF; if false, returns edit URL." + } + }, + "required": ["version", "source", "test_name", "test_id", "test_mapping_id", "config", "questions", "view"] +} +``` + +## Response + +### Success Response (HTTP 200 OK) +```json +{ + "status": "success", + "message": "Data processed successfully", + "session_id": "uuid-of-new-session", + "edit_url": "/question_entry_v2/uuid-of-new-session", + "pdf_url": "/view_pdf/uuid-of-new-session.pdf" // Only if 'view' was true +} +``` + +### Error Response (HTTP 400 Bad Request / 500 Internal Server Error) +```json +{ + "status": "error", + "message": "Detailed error description, e.g., 'Schema validation failed: Missing required field test_id'", + "errors": [...] // Optional: specific validation errors +} +``` + +## Examples + +### Curl Example: Submit Test Data for Manual Review +This example sends a minimal payload for a single test, opting for manual review in Report-Generator (i.e., `view: false`). + +```bash +curl -X POST "http://localhost:5000/json_upload_v3" \ + -H "Content-Type: application/json" \ + -d '{ + "version": "3.0", + "source": "pwdlv3", + "test_name": "Physics Midterm", + "test_id": "PHY101-MID-2024", + "test_mapping_id": "PHY101-MID-2024-STUDENT001", + "metadata": { + "student_id": "STU001", + "attempt_date": "2024-11-01" + }, + "config": { + "statuses_to_include": ["wrong", "unattempted"], + "layout": { "images_per_page": 4, "orientation": "portrait" } + }, + "questions": [ + { + "question_number": "1", + "image_url": "https://example.com/question1.png", + "status": "wrong", + "marked_solution": "B", + "correct_solution": "C", + "subject": "Physics", + "time_taken": 90 + }, + { + "question_number": "2", + "image_url": "https://example.com/question2.png", + "status": "unattempted", + "marked_solution": "", + "correct_solution": "A", + "subject": "Physics", + "time_taken": 0 + } + ], + "view": false + }' +``` + +### Curl Example: Submit Test Data for Auto-PDF Generation +This example sends a similar payload but instructs Report-Generator to automatically generate and save the PDF report (`view: true`). + +```bash +curl -X POST "http://localhost:5000/json_upload_v3" \ + -H "Content-Type: application/json" \ + -d '{ + "version": "3.0", + "source": "pwdlv3", + "test_name": "Chemistry Final", + "test_id": "CHM202-FIN-2024", + "test_mapping_id": "CHM202-FIN-2024-STUDENT002", + "metadata": { + "student_id": "STU002", + "attempt_date": "2024-12-05" + }, + "config": { + "statuses_to_include": ["wrong", "unattempted"], + "layout": { "images_per_page": 6, "orientation": "landscape" } + }, + "questions": [ + { + "question_number": "1", + "image_url": "https://example.com/chem_q1.png", + "status": "wrong", + "marked_solution": "D", + "correct_solution": "B", + "subject": "Chemistry", + "time_taken": 110 + } + ], + "view": true + }' +``` \ No newline at end of file diff --git a/docs/SETUP_MULTIPLE_API_KEYS.md b/docs/SETUP_MULTIPLE_API_KEYS.md new file mode 100644 index 0000000000000000000000000000000000000000..53d06556275a4a35b2bf11b6f605bf1c4903f1fe --- /dev/null +++ b/docs/SETUP_MULTIPLE_API_KEYS.md @@ -0,0 +1,330 @@ +# How to Use Multiple API Keys + +## Quick Setup Guide + +### Step 1: Set Environment Variables + +You can set multiple API keys. The system loads them in order: +- `GEMINI_API_KEY` β†’ **Index 0** (base key) +- `GEMINI_API_KEY_1` β†’ **Index 1** +- `GEMINI_API_KEY_2` β†’ **Index 2** +- And so on... + +#### For Gemini API (Google AI) + +```bash +# Linux/Mac +export GEMINI_API_KEY="AIzaSyAbc123..." # Index 0 (base) +export GEMINI_API_KEY_1="AIzaSyDef456..." # Index 1 +export GEMINI_API_KEY_2="AIzaSyGhi789..." # Index 2 +export GEMINI_API_KEY_3="AIzaSyJkl012..." # Index 3 + +# Windows (Command Prompt) +set GEMINI_API_KEY_1=AIzaSyAbc123... +set GEMINI_API_KEY_2=AIzaSyDef456... +set GEMINI_API_KEY_3=AIzaSyGhi789... + +# Windows (PowerShell) +$env:GEMINI_API_KEY_1="AIzaSyAbc123..." +$env:GEMINI_API_KEY_2="AIzaSyDef456..." +$env:GEMINI_API_KEY_3="AIzaSyGhi789..." +``` + +#### For NVIDIA API + +```bash +# Linux/Mac +export NVIDIA_API_KEY_1="nvapi-abc123..." +export NVIDIA_API_KEY_2="nvapi-def456..." +export NVIDIA_API_KEY_3="nvapi-ghi789..." + +# Windows (Command Prompt) +set NVIDIA_API_KEY_1=nvapi-abc123... +set NVIDIA_API_KEY_2=nvapi-def456... + +# Windows (PowerShell) +$env:NVIDIA_API_KEY_1="nvapi-abc123..." +$env:NVIDIA_API_KEY_2="nvapi-def456..." +``` + +#### For OpenRouter API (Amazon Nova) + +```bash +# Linux/Mac +export OPENROUTER_API_KEY_1="sk-or-v1-abc123..." +export OPENROUTER_API_KEY_2="sk-or-v1-def456..." +export OPENROUTER_API_KEY_3="sk-or-v1-ghi789..." + +# Windows (Command Prompt) +set OPENROUTER_API_KEY_1=sk-or-v1-abc123... +set OPENROUTER_API_KEY_2=sk-or-v1-def456... + +# Windows (PowerShell) +$env:OPENROUTER_API_KEY_1="sk-or-v1-abc123..." +$env:OPENROUTER_API_KEY_2="sk-or-v1-def456..." +``` + +### Step 2: Using .env File (Recommended - Already Configured!) + +βœ… **Good news:** The app already has .env support built-in! + +Just create a `.env` file in your project root: + +```bash +# .env file +# Gemini API Keys (get from: https://aistudio.google.com/app/apikey) +GEMINI_API_KEY=AIzaSyAbc123... # Index 0 (base key) +GEMINI_API_KEY_1=AIzaSyDef456... # Index 1 +GEMINI_API_KEY_2=AIzaSyGhi789... # Index 2 + +# NVIDIA API Keys (get from: https://build.nvidia.com/) +NVIDIA_API_KEY=nvapi-abc123... # Index 0 (base key) +NVIDIA_API_KEY_1=nvapi-def456... # Index 1 + +# OpenRouter API Keys (get from: https://openrouter.ai/keys) +OPENROUTER_API_KEY=sk-or-v1-abc123... # Index 0 (base key) +OPENROUTER_API_KEY_1=sk-or-v1-def456... # Index 1 +``` + +**That's it!** Just run the app normally: + +```bash +python3 run.py +``` + +The .env file is automatically loaded. No extra steps needed! + +**Quick Start:** +```bash +# 1. Copy the example file +cp .env.example .env + +# 2. Edit .env and add your API keys +nano .env + +# 3. Run the app +python3 run.py +``` + +### Step 3: Verify Keys Are Loaded + +Run this to check if your keys are loaded correctly: + +```python +python3 -c " +from api_key_manager import get_api_key_manager + +manager = get_api_key_manager() +status = manager.get_all_services_status() + +for service, info in status.items(): + print(f'{service.upper()}: {info[\"total_keys\"]} key(s) loaded') +" +``` + +Expected output: +``` +NVIDIA: 2 key(s) loaded +GEMINI: 3 key(s) loaded +OPENROUTER: 2 key(s) loaded +``` + +--- + +## How It Works + +### Automatic Rotation + +Once you have multiple keys configured, the system automatically: + +1. **Rotates** through them (round-robin) +2. **Fails over** when one key fails +3. **Blocks** keys that fail 3 times (for 5 minutes) +4. **Unblocks** keys after cooldown +5. **Tracks** success rates for each key + +### Example Flow + +Let's say you have 3 Gemini keys configured: + +``` +Request 1 β†’ Uses Key 1 βœ“ +Request 2 β†’ Uses Key 2 βœ“ +Request 3 β†’ Uses Key 3 βœ“ +Request 4 β†’ Uses Key 1 βœ“ (rotation back to start) +Request 5 β†’ Uses Key 2 βœ— (fails - rate limit) +Request 6 β†’ Uses Key 3 βœ“ (automatically switched) +Request 7 β†’ Uses Key 1 βœ“ +Request 8 β†’ Uses Key 2 βœ— (fails again - 2nd failure) +Request 9 β†’ Uses Key 3 βœ“ (automatically switched) +Request 10 β†’ Uses Key 2 βœ— (fails again - 3rd failure, BLOCKED) +Request 11 β†’ Uses Key 3 βœ“ (Key 2 is skipped) +Request 12 β†’ Uses Key 1 βœ“ +Request 13 β†’ Uses Key 3 βœ“ (Key 2 still blocked) +... 5 minutes later ... +Request N β†’ Uses Key 2 βœ“ (unblocked and back in rotation) +``` + +--- + +## Getting API Keys + +### Gemini API (Google AI) +1. Go to https://aistudio.google.com/app/apikey +2. Click "Create API Key" +3. Copy the key (starts with `AIzaSy...`) +4. **Tip:** Create multiple keys from different Google accounts for more quota + +### NVIDIA API +1. Go to https://build.nvidia.com/ +2. Sign in and navigate to API Keys +3. Generate a new API key +4. Copy the key (starts with `nvapi-...`) + +### OpenRouter API +1. Go to https://openrouter.ai/keys +2. Sign up and create an API key +3. Copy the key (starts with `sk-or-v1-...`) +4. **Tip:** OpenRouter gives free credits for Nova model + +--- + +## Common Scenarios + +### Scenario 1: Maximize Free Tier Usage + +If you have multiple Google accounts, create one Gemini API key from each: + +```bash +export GEMINI_API_KEY="key-from-account-1" # Index 0 +export GEMINI_API_KEY_1="key-from-account-2" # Index 1 +export GEMINI_API_KEY_2="key-from-account-3" # Index 2 +export GEMINI_API_KEY_3="key-from-account-4" # Index 3 +``` + +This gives you 4x the free tier quota! + +### Scenario 2: Paid + Free Keys + +Mix paid and free keys: + +```bash +export GEMINI_API_KEY="paid-key-with-high-quota" # Index 0 - tried first +export GEMINI_API_KEY_1="free-key-1" # Index 1 - backup +export GEMINI_API_KEY_2="free-key-2" # Index 2 - backup +``` + +The system will rotate through all of them, maximizing your available quota. + +### Scenario 3: Single Key (Backward Compatible) + +If you only have one key, the old method still works: + +```bash +export GEMINI_API_KEY="your-single-key" +``` + +The system will use this single key without rotation. + +--- + +## Troubleshooting + +### Problem: Keys not being loaded + +**Check:** +1. Environment variables are set in the same terminal/session where you run the app +2. Variable names match exactly (case-sensitive) +3. No extra spaces in variable values + +**Test:** +```bash +# Linux/Mac +echo $GEMINI_API_KEY_1 +echo $GEMINI_API_KEY_2 + +# Windows (Command Prompt) +echo %GEMINI_API_KEY_1% +echo %GEMINI_API_KEY_2% + +# Windows (PowerShell) +echo $env:GEMINI_API_KEY_1 +echo $env:GEMINI_API_KEY_2 +``` + +### Problem: Only first key is being used + +**Likely cause:** Other keys aren't set properly. + +**Fix:** Verify all keys are loaded: +```python +import os +print("Key 1:", os.environ.get('GEMINI_API_KEY_1')) +print("Key 2:", os.environ.get('GEMINI_API_KEY_2')) +print("Key 3:", os.environ.get('GEMINI_API_KEY_3')) +``` + +### Problem: All keys get blocked quickly + +**Causes:** +- Invalid API keys +- Insufficient quota/rate limits +- API service issues + +**Fix:** +1. Verify each key works individually +2. Check your quota limits with the API provider +3. Add more keys to distribute the load +4. Increase wait times between requests + +--- + +## Best Practices + +βœ… **Use at least 2-3 keys per service** for reliability +βœ… **Get keys from different accounts** to maximize free tier +βœ… **Keep backup keys** from different providers if possible +βœ… **Monitor key usage** to identify which ones work best +βœ… **Store keys securely** in .env file (add to .gitignore) +βœ… **Don't commit keys to git** - use environment variables +βœ… **Rotate keys periodically** for security + +--- + +## Advanced: Persistent Configuration + +To make environment variables persist across reboots: + +### Linux/Mac - Add to ~/.bashrc or ~/.zshrc: +```bash +export GEMINI_API_KEY_1="..." +export GEMINI_API_KEY_2="..." +export GEMINI_API_KEY_3="..." +``` + +Then reload: `source ~/.bashrc` + +### Windows - Use System Environment Variables: +1. Search for "Environment Variables" in Start Menu +2. Click "Edit system environment variables" +3. Click "Environment Variables" button +4. Add your keys under "User variables" + +--- + +## Summary + +**To use multiple API keys:** + +1. **Set numbered environment variables:** + - `GEMINI_API_KEY_1`, `GEMINI_API_KEY_2`, etc. + - `NVIDIA_API_KEY_1`, `NVIDIA_API_KEY_2`, etc. + - `OPENROUTER_API_KEY_1`, `OPENROUTER_API_KEY_2`, etc. + +2. **That's it!** The system automatically: + - Loads all keys + - Rotates through them + - Handles failures + - Maximizes availability + +No code changes needed - just set the environment variables and the API Key Manager handles everything! diff --git a/drive_routes.py b/drive_routes.py new file mode 100644 index 0000000000000000000000000000000000000000..921b9090d15531806e5e01c400f532acd6f42140 --- /dev/null +++ b/drive_routes.py @@ -0,0 +1,359 @@ +import os +import shutil +import gdown +from flask import Blueprint, render_template, request, jsonify, current_app, send_from_directory, url_for, redirect, session +from flask_login import login_required, current_user +from database import get_db_connection +from datetime import datetime +import threading +import re +import json + +# Allow OAuth over HTTP for local testing +os.environ['OAUTHLIB_INSECURE_TRANSPORT'] = '1' + +from gdrive_service import get_drive_service, create_flow, list_drive_files, download_file_to_stream, get_file_metadata + +drive_bp = Blueprint('drive', __name__) + +DRIVE_SYNC_FOLDER = 'drive_sync' + +def extract_drive_id(url): + # Extracts Drive ID (File or Folder) - simplified regex for ~25+ chars + match = re.search(r'[-\w]{25,}', url) + return match.group(0) if match else None + +def get_sync_folder_path(source_name=None): + base = os.path.join(current_app.config['OUTPUT_FOLDER'], DRIVE_SYNC_FOLDER) + if not os.path.exists(base): + os.makedirs(base) + if source_name: + path = os.path.join(base, source_name) + if not os.path.exists(path): + os.makedirs(path) + return path + return base + +@drive_bp.route('/drive_manager') +@login_required +def drive_manager(): + conn = get_db_connection() + sources = conn.execute('SELECT * FROM drive_sources WHERE user_id = ? ORDER BY created_at DESC', (current_user.id,)).fetchall() + + # Get last 4 opened PDFs + recent_pdfs = conn.execute(''' + SELECT file_id, filename, opened_at + FROM pdf_access_history + WHERE user_id = ? + ORDER BY opened_at DESC + LIMIT 4 + ''', (current_user.id,)).fetchall() + + conn.close() + + # Check Drive API Status + drive_connected = bool(current_user.google_token) + + return render_template('drive_manager.html', + sources=[dict(s) for s in sources], + drive_connected=drive_connected, + recent_pdfs=[dict(p) for p in recent_pdfs]) + +@drive_bp.route('/drive/connect') +@login_required +def connect_drive(): + try: + redirect_uri = 'http://localhost' + flow = create_flow(redirect_uri) + authorization_url, state = flow.authorization_url( + access_type='offline', + include_granted_scopes='true') + session['oauth_state'] = state + return render_template('drive_connect_manual.html', auth_url=authorization_url) + except FileNotFoundError: + return "client_secret.json not found. Please upload it to the app root via Settings.", 404 + except Exception as e: + return f"Error creating flow: {e}", 500 + +@drive_bp.route('/drive/manual_callback', methods=['POST']) +@login_required +def manual_callback(): + state = session.get('oauth_state') + full_url = request.form.get('full_url') + if not full_url: return "URL is required", 400 + try: + redirect_uri = 'http://localhost' + flow = create_flow(redirect_uri) + flow.fetch_token(authorization_response=full_url) + credentials = flow.credentials + token_json = credentials.to_json() + conn = get_db_connection() + conn.execute('UPDATE users SET google_token = ? WHERE id = ?', (token_json, current_user.id)) + conn.commit() + conn.close() + current_user.google_token = token_json + return redirect(url_for('drive.drive_manager')) + except Exception as e: + return f"Auth failed: {e}

Make sure you copied the full URL correctly.", 500 + +@drive_bp.route('/oauth2callback') +def oauth2callback(): + state = session.get('oauth_state') + if not state: return "Invalid state", 400 + try: + redirect_uri = url_for('drive.oauth2callback', _external=True) + flow = create_flow(redirect_uri) + flow.fetch_token(authorization_response=request.url) + credentials = flow.credentials + token_json = credentials.to_json() + conn = get_db_connection() + conn.execute('UPDATE users SET google_token = ? WHERE id = ?', (token_json, current_user.id)) + conn.commit() + conn.close() + current_user.google_token = token_json + return redirect(url_for('drive.drive_manager')) + except Exception as e: + return f"Auth failed: {e}", 500 + +@drive_bp.route('/drive/add', methods=['POST']) +@login_required +def add_source(): + name = request.form.get('name') + url = request.form.get('url') + if not name or not url: return jsonify({'error': 'Name and URL required'}), 400 + conn = get_db_connection() + try: + source_type = 'file' + if '/folders/' in url or 'drive/folders' in url: source_type = 'folder' + local_path = name.strip().replace(' ', '_') + conn.execute('INSERT INTO drive_sources (name, url, local_path, user_id, source_type) VALUES (?, ?, ?, ?, ?)', + (name, url, local_path, current_user.id, source_type)) + conn.commit() + return jsonify({'success': True}) + except Exception as e: + return jsonify({'error': str(e)}), 500 + finally: + conn.close() + +@drive_bp.route('/drive/delete/', methods=['POST']) +@login_required +def delete_source(id): + conn = get_db_connection() + source = conn.execute('SELECT * FROM drive_sources WHERE id = ?', (id,)).fetchone() + if not source or source['user_id'] != current_user.id: + conn.close() + return jsonify({'error': 'Unauthorized'}), 403 + conn.execute('DELETE FROM drive_sources WHERE id = ?', (id,)) + conn.commit() + conn.close() + try: + path = get_sync_folder_path(source['local_path']) + if os.path.exists(path): shutil.rmtree(path) + except Exception as e: + print(f"Error deleting folder: {e}") + return jsonify({'success': True}) + +def sync_task(source_id, user_id, app_config): + import sqlite3 + conn = sqlite3.connect('database.db') + conn.row_factory = sqlite3.Row + try: + source = conn.execute('SELECT * FROM drive_sources WHERE id = ?', (source_id,)).fetchone() + if not source: return + output_base = os.path.join(app_config['OUTPUT_FOLDER'], DRIVE_SYNC_FOLDER, source['local_path']) + if not os.path.exists(output_base): os.makedirs(output_base) + print(f"Syncing Drive: {source['name']} to {output_base}") + try: + gdown.download_folder(url=source['url'], output=output_base, quiet=False, use_cookies=False) + conn.execute('UPDATE drive_sources SET last_synced = CURRENT_TIMESTAMP WHERE id = ?', (source_id,)) + conn.commit() + print("Sync complete.") + except Exception as e: + print(f"GDown Error: {e}") + except Exception as e: + print(f"Sync Task Error: {e}") + finally: + conn.close() + +@drive_bp.route('/drive/sync/', methods=['POST']) +@login_required +def sync_source(id): + conn = get_db_connection() + source = conn.execute('SELECT * FROM drive_sources WHERE id = ?', (id,)).fetchone() + conn.close() + if not source or source['user_id'] != current_user.id: return jsonify({'error': 'Unauthorized'}), 403 + thread = threading.Thread(target=sync_task, args=(id, current_user.id, current_app.config.copy())) + thread.start() + return jsonify({'success': True, 'message': 'Sync started in background'}) + +@drive_bp.route('/drive/browse/') +@drive_bp.route('/drive/browse//') +@login_required +def browse_drive(source_id, subpath=''): + conn = get_db_connection() + source = conn.execute('SELECT * FROM drive_sources WHERE id = ?', (source_id,)).fetchone() + conn.close() + + if not source or source['user_id'] != current_user.id: return "Unauthorized", 403 + + # === API Upgrade Logic === + if current_user.google_token and not subpath: + drive_id = extract_drive_id(source['url']) + if drive_id: + # Pass source name as title + return redirect(url_for('drive.browse_drive_api', folder_id=drive_id, title=source['name'])) + # ========================= + + base_path = get_sync_folder_path(source['local_path']) + current_path = os.path.join(base_path, subpath) + + if not os.path.exists(current_path): + if source['source_type'] == 'file': pass + else: return "Path not found (Not synced yet). Click Sync Now in Manager.", 404 + + items = [] + if os.path.exists(current_path): + try: + for entry in os.scandir(current_path): + is_dir = entry.is_dir() + file_type = 'file' + if is_dir: file_type = 'folder' + elif entry.name.lower().endswith('.pdf'): file_type = 'pdf' + elif entry.name.lower().endswith(('.png', '.jpg', '.jpeg')): file_type = 'image' + + items.append({ + 'name': entry.name, + 'type': file_type, + 'path': os.path.join(subpath, entry.name).strip('/') + }) + except Exception as e: return f"Error listing files: {e}", 500 + + items.sort(key=lambda x: (x['type'] != 'folder', x['name'].lower())) + + if not items and source['source_type'] == 'file': + items.append({'name': 'Tap to Download & View', 'type': 'pdf', 'path': 'document.pdf'}) + + breadcrumbs = [] + if subpath: + parts = subpath.split('/') + built = '' + for part in parts: + built = os.path.join(built, part).strip('/') + breadcrumbs.append({'name': part, 'path': built}) + + return render_template('drive_browser.html', source=source, items=items, breadcrumbs=breadcrumbs, current_subpath=subpath) + +@drive_bp.route('/drive/file//') +@login_required +def view_drive_file(source_id, filepath): + conn = get_db_connection() + source = conn.execute('SELECT * FROM drive_sources WHERE id = ?', (source_id,)).fetchone() + conn.close() + if not source or source['user_id'] != current_user.id: return "Unauthorized", 403 + base_path = get_sync_folder_path(source['local_path']) + full_path = os.path.join(base_path, filepath) + + if not os.path.exists(full_path) and source['source_type'] == 'file': + try: + if not os.path.exists(base_path): os.makedirs(base_path) + gdown.download(url=source['url'], output=full_path, quiet=False, fuzzy=True) + except Exception as e: return f"Error downloading file: {e}", 500 + + if not os.path.exists(full_path): return "File not found.", 404 + if full_path.lower().endswith('.pdf'): + file_url = url_for('drive.serve_drive_file', source_id=source_id, filepath=os.path.basename(full_path)) + return render_template('pdfjs_viewer.html', pdf_url=file_url, pdf_title=os.path.basename(full_path)) + return send_from_directory(os.path.dirname(full_path), os.path.basename(full_path)) + +@drive_bp.route('/drive/raw//') +@login_required +def serve_drive_file(source_id, filepath): + conn = get_db_connection() + source = conn.execute('SELECT * FROM drive_sources WHERE id = ?', (source_id,)).fetchone() + conn.close() + if not source or source['user_id'] != current_user.id: return "Unauthorized", 403 + base_path = get_sync_folder_path(source['local_path']) + return send_from_directory(base_path, filepath) + +@drive_bp.route('/drive/api/list') +@drive_bp.route('/drive/api/list/') +@login_required +def api_list_files(folder_id='root'): + service = get_drive_service(current_user) + if not service: return jsonify({'error': 'Not connected'}), 401 + files, next_token = list_drive_files(service, folder_id) + file_list = [] + for f in files: + is_folder = f['mimeType'] == 'application/vnd.google-apps.folder' + icon = 'folder-fill text-warning' if is_folder else 'file-earmark-text text-secondary' + if f['mimeType'] == 'application/pdf': icon = 'file-earmark-pdf-fill text-danger' + elif 'image' in f['mimeType']: icon = 'file-earmark-image-fill text-info' + file_list.append({ + 'id': f['id'], + 'name': f['name'], + 'type': 'folder' if is_folder else 'file', + 'mimeType': f['mimeType'], + 'icon': icon, + 'size': f.get('size') + }) + return jsonify({'files': file_list, 'next_token': next_token}) + +@drive_bp.route('/drive/api/browse/') +@login_required +def browse_drive_api(folder_id): + service = get_drive_service(current_user) + if not service: return redirect(url_for('drive.drive_manager')) + title = request.args.get('title', 'My Drive') + files, next_token = list_drive_files(service, folder_id) + items = [] + for f in files: + is_folder = f['mimeType'] == 'application/vnd.google-apps.folder' + f_type = 'folder' if is_folder else ('pdf' if f['mimeType'] == 'application/pdf' else 'file') + if 'image' in f['mimeType']: f_type = 'image' + items.append({ + 'name': f['name'], + 'type': f_type, + 'path': f['id'], + 'is_api': True + }) + return render_template('drive_browser.html', source={'id': 'api', 'name': title}, items=items, breadcrumbs=[], is_api=True) + +@drive_bp.route('/drive/api/open/') +@login_required +def api_open_file(file_id): + service = get_drive_service(current_user) + if not service: return "Not connected", 401 + try: + meta = get_file_metadata(service, file_id) + if not meta: return "File not found", 404 + filename = meta['name'] + cache_dir = os.path.join(current_app.config['UPLOAD_FOLDER'], 'drive_cache') + if not os.path.exists(cache_dir): os.makedirs(cache_dir) + from werkzeug.utils import secure_filename + safe_name = secure_filename(filename) + file_path = os.path.join(cache_dir, safe_name) + if not os.path.exists(file_path): + with open(file_path, 'wb') as f: + download_file_to_stream(service, file_id, f) + if safe_name.lower().endswith('.pdf'): + # Log PDF access to history + conn = get_db_connection() + conn.execute(''' + INSERT INTO pdf_access_history (user_id, file_id, filename, source_type, opened_at) + VALUES (?, ?, ?, ?, CURRENT_TIMESTAMP) + ''', (current_user.id, file_id, filename, 'drive_api')) + conn.commit() + conn.close() + + file_url = url_for('drive.serve_cache_file', filename=safe_name) + return render_template('pdfjs_viewer.html', pdf_url=file_url, pdf_title=filename) + if safe_name.lower().endswith(('.png', '.jpg', '.jpeg')): + return send_from_directory(cache_dir, safe_name) + return "File downloaded but type not supported for viewing.", 200 + except Exception as e: return f"Error opening file: {e}", 500 + +@drive_bp.route('/drive/cache/') +@login_required +def serve_cache_file(filename): + cache_dir = os.path.join(current_app.config['UPLOAD_FOLDER'], 'drive_cache') + return send_from_directory(cache_dir, filename) diff --git a/entrypoint.sh b/entrypoint.sh new file mode 100644 index 0000000000000000000000000000000000000000..9784e544545046ac33876900e5e179678aeb09ef --- /dev/null +++ b/entrypoint.sh @@ -0,0 +1,48 @@ +#!/bin/bash + +# Ensure we are in the app directory +cd /app + +# 1. Initialize local structure +python3 hf_sync.py init + +# 2. Try to download existing data +if [ -n "$DATASET_REPO_ID" ]; then + python3 hf_sync.py download +else + echo "DATASET_REPO_ID not set, skipping initial download." +fi + +# 3. Setup symlinks +# Remove existing if any +rm -rf database.db output processed uploads + +# Ensure data_repo has what we need +mkdir -p data_repo/output data_repo/processed data_repo/uploads + +# Create symlinks +ln -sf data_repo/database.db database.db +ln -sf data_repo/output output +ln -sf data_repo/processed processed +ln -sf data_repo/uploads uploads + +# 4. Start periodic background upload +if [ -n "$DATASET_REPO_ID" ] && [ -n "$HF_TOKEN" ]; then + ( + while true; do + sleep 3600 # Every hour + echo "Performing scheduled backup to HF Datasets..." + python3 hf_sync.py upload + done + ) & +else + echo "DATASET_REPO_ID or HF_TOKEN not set, periodic backup disabled." +fi + +# 5. Start the application +echo "Starting application on port 7680..." +# Using gunicorn with eventlet for SocketIO support if needed, +# otherwise standard gunicorn. Since eventlet isn't in requirements.txt, +# we'll try to install it or use threads. +pip install eventlet gunicorn +exec gunicorn --bind 0.0.0.0:7680 --worker-class eventlet -w 1 app:app diff --git a/gdrive_service.py b/gdrive_service.py new file mode 100644 index 0000000000000000000000000000000000000000..7a89f523ac42ecdbbd3d60f49d5a3c7b88d62a95 --- /dev/null +++ b/gdrive_service.py @@ -0,0 +1,76 @@ + +import os +import json +from google.oauth2.credentials import Credentials +from google_auth_oauthlib.flow import Flow +from googleapiclient.discovery import build +from flask import current_app, url_for, session +from utils import get_db_connection + +# Scopes required for Drive API +SCOPES = ['https://www.googleapis.com/auth/drive.readonly', 'https://www.googleapis.com/auth/drive.metadata.readonly'] + +def get_drive_service(user): + """ + Returns a build('drive', 'v3', credentials=creds) service object + if user has valid tokens. Returns None otherwise. + """ + if not user.google_token: + return None + + try: + token_info = json.loads(user.google_token) + creds = Credentials.from_authorized_user_info(token_info, SCOPES) + return build('drive', 'v3', credentials=creds) + except Exception as e: + current_app.logger.error(f"Error building Drive service: {e}") + return None + +def create_flow(redirect_uri): + """Creates an OAuth2 Flow object.""" + # We need a client_secret.json. + # For now, we assume it's in the root or config. + # Or we can construct it from env vars if we had them. + # User needs to provide this. I'll check if it exists. + + client_secrets_file = os.path.join(current_app.root_path, 'client_secret.json') + if not os.path.exists(client_secrets_file): + raise FileNotFoundError("client_secret.json not found. Please upload it to the root directory.") + + flow = Flow.from_client_secrets_file( + client_secrets_file, + scopes=SCOPES, + redirect_uri=redirect_uri + ) + return flow + +def list_drive_files(service, folder_id='root', page_token=None): + """Lists files in a specific Drive folder.""" + try: + results = service.files().list( + q=f"'{folder_id}' in parents and trashed = false", + pageSize=50, + pageToken=page_token, + fields="nextPageToken, files(id, name, mimeType, iconLink, webViewLink, size, modifiedTime)", + orderBy="folder,name" + ).execute() + return results.get('files', []), results.get('nextPageToken') + except Exception as e: + current_app.logger.error(f"Drive API List Error: {e}") + return [], None + +def get_file_metadata(service, file_id): + try: + return service.files().get(fileId=file_id, fields="id, name, mimeType, size").execute() + except Exception as e: + return None + +def download_file_to_stream(service, file_id, stream): + """Downloads file content to a writeable stream.""" + from googleapiclient.http import MediaIoBaseDownload + + request = service.files().get_media(fileId=file_id) + downloader = MediaIoBaseDownload(stream, request) + done = False + while done is False: + status, done = downloader.next_chunk() diff --git a/gemini_classification_prompt.txt b/gemini_classification_prompt.txt new file mode 100644 index 0000000000000000000000000000000000000000..eb906a8a02faff5319bfeb49ac2ca2596958d4f2 --- /dev/null +++ b/gemini_classification_prompt.txt @@ -0,0 +1,208 @@ + +**System Role:** You are a question classifier for NEET/JEE exams, specialized in mapping questions to their corresponding subjects and chapters from the NCERT syllabus. + +Your task is to analyze each question, first classify it into the most relevant subject, and then identify the most relevant chapter(s) from the official syllabus structures provided below. + +**Available Subjects (Use these exact titles):** +- Biology +- Chemistry +- Physics +- Mathematics + +**Syllabus Chapters (Use these exact titles for the respective subjects):** + +--- +**1. BIOLOGY (Common for NEET & JEE)** + +**Class XI** +1. The Living World +2. Biological Classification +3. Plant Kingdom +4. Animal Kingdom +5. Morphology of Flowering Plants +6. Anatomy of Flowering Plants +7. Structural Organisation in Animals +8. Cell: The Unit of Life +9. Biomolecules +10. Cell Cycle and Cell Division +11. Photosynthesis in Higher Plants +12. Respiration in Plants +13. Plant Growth and Development +14. Breathing and Exchange of Gases +15. Body Fluids and Circulation +16. Excretory Products and their Elimination +17. Locomotion and Movement +18. Neural Control and Coordination +19. Chemical Coordination and Integration +20. Sexual Reproduction in Flowering Plants +21. Human Reproduction +22. Reproductive Health +23. Principles of Inheritance and Variation +24. Molecular Basis of Inheritance +25. Evolution +26. Health and Disease +27. Improvement in Food Production +28. Microbes in Human Welfare +29. Biotechnology - Principles and Processes +30. Biotechnology and Its Applications +31. Organisms and Populations +32. Ecosystem +33. Biodiversity and Its Conservation + +--- +**2. CHEMISTRY (Common for NEET & JEE)** + +**Class XI** +1. Some Basic Concepts of Chemistry +2. Structure of Atom +3. Classification of Elements and Periodicity in Properties +4. Chemical Bonding and Molecular Structure +5. States of Matter: Gases and Liquids +6. Thermodynamics +7. Equilibrium +8. Redox Reactions +9. Hydrogen +10. The s-Block Elements +11. The p-Block Elements (Group 13 and 14) +12. Organic Chemistry – Some Basic Principles and Techniques (GOC) +13. Hydrocarbons +14. Environmental Chemistry + +**Class XII** +1. The Solid State +2. Solutions +3. Electrochemistry +4. Chemical Kinetics +5. Surface Chemistry +6. General Principles and Processes of Isolation of Elements (Metallurgy) +7. The p-Block Elements (Group 15 to 18) +8. The d- and f- Block Elements +9. Coordination Compounds +10. Haloalkanes and Haloarenes +11. Alcohols, Phenols and Ethers +12. Aldehydes, Ketones and Carboxylic Acids +13. Amines +14. Biomolecules +15. Polymers +16. Chemistry in Everyday Life + +--- +**3. PHYSICS (Common for NEET & JEE)** + +**Class XI** +1. Units and Measurements +2. Motion in a Straight Line +3. Motion in a Plane +4. Laws of Motion +5. Work, Energy and Power +6. System of Particles and Rotational Motion +7. Gravitation +8. Mechanical Properties of Solids +9. Mechanical Properties of Fluids +10. Thermal Properties of Matter +11. Thermodynamics +12. Kinetic Theory +13. Oscillations +14. Waves + +**Class XII** +1. Electric Charges and Fields +2. Electrostatic Potential and Capacitance +3. Current Electricity +4. Moving Charges and Magnetism +5. Magnetism and Matter +6. Electromagnetic Induction +7. Alternating Current +8. Electromagnetic Waves +9. Ray Optics and Optical Instruments +10. Wave Optics +11. Dual Nature of Radiation and Matter +12. Atoms +13. Nuclei +14. Semiconductor Electronics: Materials, Devices and Simple Circuits +15. Communication Systems + +--- +**4. MATHEMATICS (For JEE Only)** + +**Class XI** +1. Sets +2. Relations and Functions +3. Trigonometric Functions +4. Principle of Mathematical Induction +5. Complex Numbers and Quadratic Equations +6. Linear Inequalities +7. Permutations and Combinations +8. Binomial Theorem +9. Sequences and Series +10. Straight Lines +11. Conic Sections +12. Introduction to Three Dimensional Geometry +13. Limits and Derivatives +14. Mathematical Reasoning +15. Statistics +16. Probability + +**Class XII** +1. Relations and Functions +2. Inverse Trigonometric Functions +3. Matrices +4. Determinants +5. Continuity and Differentiability +6. Application of Derivatives +7. Integrals +8. Application of Integrals +9. Differential Equations +10. Vector Algebra +11. Three Dimensional Geometry +12. Linear Programming +13. Probability + +--- + +**Classification Guidelines:** + +1. **Primary Classification**: Identify the single most relevant subject, and then the most relevant chapter(s) within that subject, that directly addresses the question's core concept. +2. **Multi-Chapter Questions**: If a question explicitly spans 2-3 distinct chapters, include all relevant chapters. +3. **Confidence Scoring** (0.0 to 1.0): + * **1.0**: Perfect match + * **0.8-0.9**: Strong match + * **0.5-0.7**: Moderate match + * **Below 0.5**: Avoid unless unavoidable. +4. **Non-Syllabus Questions**: If a question is not from any of the provided subjects/chapters, set `subject` to 'Unclassified' and `chapter_title` to 'Unclassified'. + +**Critical Requirements:** + +- Use ONLY the subject titles exactly as listed above, or 'Unclassified'. +- Use ONLY the chapter titles exactly as listed above, or 'Unclassified'. +- Preserve the original question text completely. +- Output ONLY valid JSON. +- The "index" field MUST match the question number shown in the input (e.g., if the question is numbered "8.", then "index": 8). + +**Output JSON Schema:** + +```json +{ + "data": [ + { + "index": 1, + "subject": "", + "chapter_index": , + "chapter_title": "", + "original_question_text": "", + "confidence": <0.0 to 1.0> + } + ], + "success": [true] +} +``` + +Now classify the following question(s): +``` +8. first-order reaction has half-life 200 s. Time required for the amount of reactant to become one- eighth of its initial value is: (A) 200 s (B) 400 s (C) 600 s (D) 800 s +9. Which hormone helps in internode/petiole elongation in deep water rice plants to keep leaves/ upper parts of the shoot above water? (A) Gibberellins (B) Zeatin (C) ABA (D) Ethylene +10. Given below are two statements: The interphase nucleus has highly extended and elaborate nucleoprotein fibres called chromatin which contains DNA and some basic proteins called histones, some non-histone proteins and also RNA. Statement I: Statement II: A haploid set of chromosomes in humans contains 3.3 X 109 bp which is approximately two metre long thread of DNA distributed among its twenty three chromosomes. In the light of the above statements, choose the most appropriate answer from the options given below: (A) Statement is correct but Statement Il is incorrect. (B) Statement is incorrect but Statement ll is correct. (C) Both Statement and Statement ll are correct. (D) Both Statement and Statement ll are incorrect. +11. A force F = - (yi + +x)) N acts on a particle moving in the xy plane. Starting from the origin, the particle is taken along the positive x-axis to the point (a, 0) m and then parallel to the y-axis to the point (a, a) m. The total work done (in joules) by the force is: (A) kaΒ² (B) kaΒ² (C) -2kaΒ² (D) Zero +12. On electrolysis of dilute nitric acid using platinum electrodes, the product obtained at the anode is: (A) H2 gas (B) O2 gas (C) NO2 gas (D) N2 gas +13. Identify the incorrectly matched pair: (A) Petiole is modified for photosynthesis Australian acacia (B) Leaves modified into spines Cactus (C) Stem modified into a fleshy, cylindrical photosynthetic structure Opuntia (D) Stem modified into thorns Citrus +``` diff --git a/gemini_classifier.py b/gemini_classifier.py new file mode 100644 index 0000000000000000000000000000000000000000..314fc172c6c583738670b6f893d2c672a4d72373 --- /dev/null +++ b/gemini_classifier.py @@ -0,0 +1,278 @@ +import os +import json +import requests +import sys +from typing import List, Optional, Dict, Any +from api_key_manager import get_api_key_manager + +def classify_questions_with_gemini(questions: List[str], start_index: int = 0) -> Optional[Dict[Any, Any]]: + """ + Classifies a single batch of questions using the Gemini API. + `questions` should be a list of strings representing one batch. + `start_index` is the overall starting index for this batch (e.g., 0, 7, 14...). + """ + # Get API key from the manager + manager = get_api_key_manager() + api_key, key_index = manager.get_key('gemini') + + if not api_key: + raise ValueError("No available Gemini API keys. Please set GEMINI_API_KEY or GOOGLE_API_KEY environment variable.") + + # Construct the input text with the current batch of questions + # The model expects 1-based indexing in the prompt. + input_text = "\n".join([f"{j + start_index + 1}. {q}" for j, q in enumerate(questions)]) + + prompt = f""" +**System Role:** You are a question classifier for NEET/JEE exams, specialized in mapping questions to their corresponding subjects and chapters from the NCERT syllabus. + +Your task is to analyze each question, first classify it into the most relevant subject, and then identify the most relevant chapter(s) from the official syllabus structures provided below. + +**Available Subjects (Use these exact titles):** +- Biology +- Chemistry +- Physics +- Mathematics + +**Syllabus Chapters (Use these exact titles for the respective subjects):** + +--- +**1. BIOLOGY (Common for NEET & JEE)** + +**Class XI** +1. The Living World +2. Biological Classification +3. Plant Kingdom +4. Animal Kingdom +5. Morphology of Flowering Plants +6. Anatomy of Flowering Plants +7. Structural Organisation in Animals +8. Cell: The Unit of Life +9. Biomolecules +10. Cell Cycle and Cell Division +11. Photosynthesis in Higher Plants +12. Respiration in Plants +13. Plant Growth and Development +14. Breathing and Exchange of Gases +15. Body Fluids and Circulation +16. Excretory Products and their Elimination +17. Locomotion and Movement +18. Neural Control and Coordination +19. Chemical Coordination and Integration +20. Sexual Reproduction in Flowering Plants +21. Human Reproduction +22. Reproductive Health +23. Principles of Inheritance and Variation +24. Molecular Basis of Inheritance +25. Evolution +26. Health and Disease +27. Improvement in Food Production +28. Microbes in Human Welfare +29. Biotechnology - Principles and Processes +30. Biotechnology and Its Applications +31. Organisms and Populations +32. Ecosystem +33. Biodiversity and Its Conservation + +--- +**2. CHEMISTRY (Common for NEET & JEE)** + +**Class XI** +1. Some Basic Concepts of Chemistry +2. Structure of Atom +3. Classification of Elements and Periodicity in Properties +4. Chemical Bonding and Molecular Structure +5. States of Matter: Gases and Liquids +6. Thermodynamics +7. Equilibrium +8. Redox Reactions +9. Hydrogen +10. The s-Block Elements +11. The p-Block Elements (Group 13 and 14) +12. Organic Chemistry – Some Basic Principles and Techniques (GOC) +13. Hydrocarbons +14. Environmental Chemistry + +**Class XII** +1. The Solid State +2. Solutions +3. Electrochemistry +4. Chemical Kinetics +5. Surface Chemistry +6. General Principles and Processes of Isolation of Elements (Metallurgy) +7. The p-Block Elements (Group 15 to 18) +8. The d- and f- Block Elements +9. Coordination Compounds +10. Haloalkanes and Haloarenes +11. Alcohols, Phenols and Ethers +12. Aldehydes, Ketones and Carboxylic Acids +13. Amines +14. Biomolecules +15. Polymers +16. Chemistry in Everyday Life + +--- +**3. PHYSICS (Common for NEET & JEE)** + +**Class XI** +1. Units and Measurements +2. Motion in a Straight Line +3. Motion in a Plane +4. Laws of Motion +5. Work, Energy and Power +6. System of Particles and Rotational Motion +7. Gravitation +8. Mechanical Properties of Solids +9. Mechanical Properties of Fluids +10. Thermal Properties of Matter +11. Thermodynamics +12. Kinetic Theory +13. Oscillations +14. Waves + +**Class XII** +1. Electric Charges and Fields +2. Electrostatic Potential and Capacitance +3. Current Electricity +4. Moving Charges and Magnetism +5. Magnetism and Matter +6. Electromagnetic Induction +7. Alternating Current +8. Electromagnetic Waves +9. Ray Optics and Optical Instruments +10. Wave Optics +11. Dual Nature of Radiation and Matter +12. Atoms +13. Nuclei +14. Semiconductor Electronics: Materials, Devices and Simple Circuits +15. Communication Systems + +--- +**4. MATHEMATICS (For JEE Only)** + +**Class XI** +1. Sets +2. Relations and Functions +3. Trigonometric Functions +4. Principle of Mathematical Induction +5. Complex Numbers and Quadratic Equations +6. Linear Inequalities +7. Permutations and Combinations +8. Binomial Theorem +9. Sequences and Series +10. Straight Lines +11. Conic Sections +12. Introduction to Three Dimensional Geometry +13. Limits and Derivatives +14. Mathematical Reasoning +15. Statistics +16. Probability + +**Class XII** +1. Relations and Functions +2. Inverse Trigonometric Functions +3. Matrices +4. Determinants +5. Continuity and Differentiability +6. Application of Derivatives +7. Integrals +8. Application of Integrals +9. Differential Equations +10. Vector Algebra +11. Three Dimensional Geometry +12. Linear Programming +13. Probability + +--- + +**Classification Guidelines:** + +1. **Primary Classification**: Identify the single most relevant subject, and then the most relevant chapter(s) within that subject, that directly addresses the question's core concept. +2. **Multi-Chapter Questions**: If a question explicitly spans 2-3 distinct chapters, include all relevant chapters. +3. **Confidence Scoring** (0.0 to 1.0): + * **1.0**: Perfect match + * **0.8-0.9**: Strong match + * **0.5-0.7**: Moderate match + * **Below 0.5**: Avoid unless unavoidable. +4. **Non-Syllabus Questions**: If a question is not from any of the provided subjects/chapters, set `subject` to 'Unclassified' and `chapter_title` to 'Unclassified'. + +**Critical Requirements:** + +- Use ONLY the subject titles exactly as listed above, or 'Unclassified'. +- Use ONLY the chapter titles exactly as listed above, or 'Unclassified'. +- Preserve the original question text completely. +- Output ONLY valid JSON. +- The "index" field MUST match the question number shown in the input (e.g., if the question is numbered "8.", then "index": 8). + +**Output JSON Schema:** + +```json +{{ + "data": [ + {{ + "index": 1, + "subject": "", + "chapter_index": , + "chapter_title": "", + "original_question_text": "", + "confidence": <0.0 to 1.0> + }} + ], + "success": [true] +}} +``` + +Now classify the following question(s): +``` +{input_text} +``` +""" + with open('gemini_classification_prompt.txt', 'w') as f: + f.write(prompt) + + url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent?key={api_key}" + headers = {'Content-Type': 'application/json'} + + request_body = { + "contents": [{"role": "user", "parts": [{"text": prompt}]}], + "generationConfig": { + "responseMimeType": "application/json", + } + } + + print(f"Sending batch to Gemini API with {len(questions)} questions.") + print(f"Sending request to Gemini API. Body: {json.dumps(request_body, indent=2)}") # Full logging enabled + + try: + response = requests.post(url, headers=headers, json=request_body, timeout=300) + response.raise_for_status() + + print(f"Received raw response from Gemini: {response.text}") # Full logging enabled + + # Parse the response JSON + response_json = response.json() + + # Check if the response has valid content and parts + candidate = response_json.get('candidates', [{}])[0] + content = candidate.get('content', {}) + parts = content.get('parts', []) + + if not parts: + print("Error: Model generated thoughts but no output text.") + manager.mark_failure('gemini', key_index) + return None + else: + text = parts[0]['text'] + batch_result = json.loads(text) + manager.mark_success('gemini', key_index) + return batch_result + + except requests.exceptions.RequestException as e: + print(f"Error during Gemini API call: {repr(e)}", file=sys.stderr) + print(f"Response body: {e.response.text if e.response else 'N/A'}", file=sys.stderr) + manager.mark_failure('gemini', key_index) + return None + except (json.JSONDecodeError, KeyError, IndexError) as e: + print(f"Error parsing Gemini response: {repr(e)}", file=sys.stderr) + print(f"Raw response text: {response.text if 'response' in locals() else 'N/A'}", file=sys.stderr) + manager.mark_failure('gemini', key_index) + return None diff --git a/gemini_subjective.py b/gemini_subjective.py new file mode 100644 index 0000000000000000000000000000000000000000..ede083bd4d27a888a7488f075d8fab58823070f4 --- /dev/null +++ b/gemini_subjective.py @@ -0,0 +1,114 @@ +import os +import json +import requests +import sys +import base64 +from typing import List, Optional, Dict, Any + +def generate_subjective_questions(image_path: str) -> Optional[Dict[Any, Any]]: + """ + Transcribes and structures subjective questions from an image using the Gemini API. + """ + api_key = os.environ.get("GEMINI_API_KEY") + if not api_key: + print("Error: GEMINI_API_KEY environment variable is not set.", file=sys.stderr) + return None + + # Read and encode image + try: + with open(image_path, "rb") as image_file: + encoded_string = base64.b64encode(image_file.read()).decode('utf-8') + except Exception as e: + print(f"Error reading image file: {e}", file=sys.stderr) + return None + + model_id = "gemini-flash-latest" + url = f"https://generativelanguage.googleapis.com/v1beta/models/{model_id}:generateContent?key={api_key}" + headers = {'Content-Type': 'application/json'} + + prompt_text = """ + Analyze the provided image. It contains a list of subjective questions (handwritten or printed). + + Task: + 1. **Transcribe** each question exactly as written. + 2. **Identify the Topic:** Determine the subject or topic for each question (e.g., "Ascomycetes", "Thermodynamics"). If the header specifies a topic, use that. + 3. **Structure:** Return the data in the specified JSON format. + 4. **Numbering:** Use the question number found in the image. + + If the image contains multiple questions, extract all of them. + """ + + request_body = { + "contents": [ + { + "role": "user", + "parts": [ + { + "inline_data": { + "mime_type": "image/jpeg", # Assuming JPEG/PNG, API is flexible with image/* usually, but let's send jpeg or png based on file if needed, usually jpeg works for generic + "data": encoded_string + } + }, + { + "text": prompt_text + } + ] + } + ], + "generationConfig": { + "responseMimeType": "application/json", + "responseSchema": { + "type": "object", + "properties": { + "success": {"type": "boolean"}, + "data": { + "type": "array", + "items": { + "type": "object", + "properties": { + "question_topic": {"type": "string"}, + "question_html": {"type": "string"}, + "question_number_within_topic": {"type": "string"} + }, + "required": ["question_topic", "question_html", "question_number_within_topic"] + } + } + }, + "required": ["success", "data"] + } + } + } + + try: + response = requests.post(url, headers=headers, json=request_body, timeout=120) + response.raise_for_status() + + response_json = response.json() + + # Extract text from candidate + candidate = response_json.get('candidates', [{}])[0] + content = candidate.get('content', {}) + parts = content.get('parts', []) + + if not parts: + print("Error: Gemini generated no content.") + return None + + text = parts[0]['text'] + return json.loads(text) + + except requests.exceptions.RequestException as e: + print(f"Error during Gemini API call: {e}", file=sys.stderr) + if e.response: + print(f"Response: {e.response.text}", file=sys.stderr) + return None + except json.JSONDecodeError as e: + print(f"Error parsing JSON response: {e}", file=sys.stderr) + print(f"Raw text: {text}", file=sys.stderr) + return None + +if __name__ == "__main__": + # Test the function + result = generate_subjective_questions("Ascomycetes") + if result: + print(json.dumps(result, indent=2)) diff --git a/gemma_classifier.py b/gemma_classifier.py new file mode 100644 index 0000000000000000000000000000000000000000..4f3ebfd7ec177dee58cb5a25a2aae7c780f41e63 --- /dev/null +++ b/gemma_classifier.py @@ -0,0 +1,297 @@ +import os +import json +import requests +import sys +from typing import List, Dict, Any, Optional +from api_key_manager import get_api_key_manager + +class GemmaClassifier: + def __init__(self): + # API key will be fetched dynamically via get_api_key_manager + pass + + def classify(self, questions: List[str], start_index: int = 0) -> Optional[Dict[str, Any]]: + """ + Classifies a list of questions using the NVIDIA NIM Gemma API. + `questions` should be a list of strings representing the questions to classify. + `start_index` is the overall starting index for this batch (e.g., 0, 7, 14...). + """ + manager = get_api_key_manager() + api_key, key_index = manager.get_key('nvidia') + + if not api_key: + raise ValueError("No available NVIDIA API keys. Please set NVIDIA_API_KEY environment variable.") + + full_prompt = self._generate_gemma_prompt(questions=questions, start_index=start_index) + + url = "https://integrate.api.nvidia.com/v1/chat/completions" + headers = { + "Authorization": f"Bearer {api_key}", + "Content-Type": "application/json" + } + + payload = { + "model": "google/gemma-3n-e4b-it", + "messages": [{"role": "user", "content": full_prompt}], + "temperature": 0.2, + "max_tokens": 2048, + "stream": False + } + + print(f"Sending batch to NVIDIA NIM Gemma API with {len(questions)} questions.") + + try: + response = requests.post(url, headers=headers, json=payload, timeout=300) + response.raise_for_status() + + response_json = response.json() + + if 'choices' in response_json and len(response_json['choices']) > 0: + first_choice = response_json['choices'][0] + if 'message' in first_choice and 'content' in first_choice['message']: + model_output_content = first_choice['message']['content'] + + if model_output_content.startswith("```json") and model_output_content.endswith("```"): + model_output_content = model_output_content[7:-3].strip() + + try: + batch_result = json.loads(model_output_content) + manager.mark_success('nvidia', key_index) + return batch_result + except json.JSONDecodeError as e: + print(f"Error decoding JSON from model output: {e}", file=sys.stderr) + print(f"Model output content: {model_output_content}", file=sys.stderr) + manager.mark_failure('nvidia', key_index) + return None + else: + print("Error: 'message' or 'content' not found in NVIDIA NIM Gemma response choice.", file=sys.stderr) + manager.mark_failure('nvidia', key_index) + return None + else: + print("Error: 'choices' not found or empty in NVIDIA NIM Gemma response.", file=sys.stderr) + manager.mark_failure('nvidia', key_index) + return None + + except requests.exceptions.RequestException as e: + print(f"Error during NVIDIA NIM Gemma API call: {repr(e)}", file=sys.stderr) + if e.response is not None: + print(f"Response status code: {e.response.status_code}", file=sys.stderr) + print(f"Response body: {e.response.text}", file=sys.stderr) + manager.mark_failure('nvidia', key_index) + return None + except Exception as e: + print(f"An unexpected error occurred: {e}", file=sys.stderr) + manager.mark_failure('nvidia', key_index) + return None + + def _generate_gemma_prompt(self, questions: List[str], start_index: int) -> str: + """ + Generates the detailed prompt for the Gemma classifier, similar to gemini_classifier.py. + """ + input_text = "\n".join([f"{j + start_index + 1}. {q}" for j, q in enumerate(questions)]) + + prompt = f""" +**System Role:** You are a question classifier for NEET/JEE exams, specialized in mapping questions to their corresponding subjects and chapters from the NCERT syllabus. + +Your task is to analyze each question, first classify it into the most relevant subject, and then identify the most relevant chapter(s) from the official syllabus structures provided below. + +**Available Subjects (Use these exact titles):** +- Biology +- Chemistry +- Physics +- Mathematics + +**Syllabus Chapters (Use these exact titles for the respective subjects):** + +--- +**1. BIOLOGY (Common for NEET & JEE)** + +**Class XI** +1. The Living World +2. Biological Classification +3. Plant Kingdom +4. Animal Kingdom +5. Morphology of Flowering Plants +6. Anatomy of Flowering Plants +7. Structural Organisation in Animals +8. Cell: The Unit of Life +9. Biomolecules +10. Cell Cycle and Cell Division +11. Photosynthesis in Higher Plants +12. Respiration in Plants +13. Plant Growth and Development +14. Breathing and Exchange of Gases +15. Body Fluids and Circulation +16. Excretory Products and their Elimination +17. Locomotion and Movement +18. Neural Control and Coordination +19. Chemical Coordination and Integration +20. Sexual Reproduction in Flowering Plants +21. Human Reproduction +22. Reproductive Health +23. Principles of Inheritance and Variation +24. Molecular Basis of Inheritance +25. Evolution +26. Health and Disease +27. Improvement in Food Production +28. Microbes in Human Welfare +29. Biotechnology - Principles and Processes +30. Biotechnology and Its Applications +31. Organisms and Populations +32. Ecosystem +33. Biodiversity and Its Conservation + +--- +**2. CHEMISTRY (Common for NEET & JEE)** + +**Class XI** +1. Some Basic Concepts of Chemistry +2. Structure of Atom +3. Classification of Elements and Periodicity in Properties +4. Chemical Bonding and Molecular Structure +5. States of Matter: Gases and Liquids +6. Thermodynamics +7. Equilibrium +8. Redox Reactions +9. Hydrogen +10. The s-Block Elements +11. The p-Block Elements (Group 13 and 14) +12. Organic Chemistry – Some Basic Principles and Techniques (GOC) +13. Hydrocarbons +14. Environmental Chemistry + +**Class XII** +1. The Solid State +2. Solutions +3. Electrochemistry +4. Chemical Kinetics +5. Surface Chemistry +6. General Principles and Processes of Isolation of Elements (Metallurgy) +7. The p-Block Elements (Group 15 to 18) +8. The d- and f- Block Elements +9. Coordination Compounds +10. Haloalkanes and Haloarenes +11. Alcohols, Phenols and Ethers +12. Aldehydes, Ketones and Carboxylic Acids +13. Amines +14. Biomolecules +15. Polymers +16. Chemistry in Everyday Life + +--- +**3. PHYSICS (Common for NEET & JEE)** + +**Class XI** +1. Units and Measurements +2. Motion in a Straight Line +3. Motion in a Plane +4. Laws of Motion +5. Work, Energy and Power +6. System of Particles and Rotational Motion +7. Gravitation +8. Mechanical Properties of Solids +9. Mechanical Properties of Fluids +10. Thermal Properties of Matter +11. Thermodynamics +12. Kinetic Theory +13. Oscillations +14. Waves + +**Class XII** +1. Electric Charges and Fields +2. Electrostatic Potential and Capacitance +3. Current Electricity +4. Moving Charges and Magnetism +5. Magnetism and Matter +6. Electromagnetic Induction +7. Alternating Current +8. Electromagnetic Waves +9. Ray Optics and Optical Instruments +10. Wave Optics +11. Dual Nature of Radiation and Matter +12. Atoms +13. Nuclei +14. Semiconductor Electronics: Materials, Devices and Simple Circuits +15. Communication Systems + +--- +**4. MATHEMATICS (For JEE Only)** + +**Class XI** +1. Sets +2. Relations and Functions +3. Trigonometric Functions +4. Principle of Mathematical Induction +5. Complex Numbers and Quadratic Equations +6. Linear Inequalities +7. Permutations and Combinations +8. Binomial Theorem +9. Sequences and Series +10. Straight Lines +11. Conic Sections +12. Introduction to Three Dimensional Geometry +13. Limits and Derivatives +14. Mathematical Reasoning +15. Statistics +16. Probability + +**Class XII** +1. Relations and Functions +2. Inverse Trigonometric Functions +3. Matrices +4. Determinants +5. Continuity and Differentiability +6. Application of Derivatives +7. Integrals +8. Application of Integrals +9. Differential Equations +10. Vector Algebra +11. Three Dimensional Geometry +12. Linear Programming +13. Probability + +--- + +**Classification Guidelines:** + +1. **Primary Classification**: Identify the single most relevant subject, and then the most relevant chapter(s) within that subject, that directly addresses the question's core concept. +2. **Multi-Chapter Questions**: If a question explicitly spans 2-3 distinct chapters, include all relevant chapters. +3. **Confidence Scoring** (0.0 to 1.0): + * **1.0**: Perfect match + * **0.8-0.9**: Strong match + * **0.5-0.7**: Moderate match + * **Below 0.5**: Avoid unless unavoidable. +4. **Non-Syllabus Questions**: If a question is not from any of the provided subjects/chapters, set `subject` to 'Unclassified' and `chapter_title` to 'Unclassified'. + +**Critical Requirements:** + +- Use ONLY the subject titles exactly as listed above, or 'Unclassified'. +- Use ONLY the chapter titles exactly as listed above, or 'Unclassified'. +- Preserve the original question text completely. +- Output ONLY valid JSON. +- The "index" field MUST match the question number shown in the input (e.g., if the question is numbered "8.", then "index": 8). + +**Output JSON Schema:** + +```json +{{ + "data": [ + {{ + "index": 1, + "subject": "", + "chapter_index": , + "chapter_title": "", + "original_question_text": "", + "confidence": <0.0 to 1.0> + }} + ], + "success": [true] +}} +``` + +Now classify the following question(s): +``` +{input_text} +``` +""" + return prompt diff --git a/hf_sync.py b/hf_sync.py new file mode 100644 index 0000000000000000000000000000000000000000..327c5e95abbbba255e9016cd369764bf54e682a6 --- /dev/null +++ b/hf_sync.py @@ -0,0 +1,77 @@ +import os +import subprocess +import sys +import shutil + +# Configuration +REPO_ID = os.environ.get("DATASET_REPO_ID") +HF_TOKEN = os.environ.get("HF_TOKEN") + +def run_command(command): + print(f"Running: {' '.join(command)}") + # Ensure HF_TOKEN is in environment for the command + env = os.environ.copy() + if HF_TOKEN: + env["HF_TOKEN"] = HF_TOKEN + + result = subprocess.run(command, capture_output=True, text=True, env=env) + if result.returncode != 0: + print(f"Error: {result.stderr}") + else: + print(f"Output: {result.stdout}") + return result.returncode == 0 + +def download(): + if not REPO_ID: + print("DATASET_REPO_ID not set, skipping download.") + return + + print(f"Downloading data from {REPO_ID}...") + # hf download REPO_ID --repo-type dataset --local-dir data_repo + # Using --local-dir-use-symlinks False to avoid issues in some environments + success = run_command(["hf", "download", REPO_ID, "--repo-type", "dataset", "--local-dir", "data_repo", "--local-dir-use-symlinks", "False"]) + if success: + print("Download successful.") + else: + print("Download failed or repository is empty.") + +def upload(): + if not REPO_ID: + print("DATASET_REPO_ID not set, skipping upload.") + return + if not HF_TOKEN: + print("HF_TOKEN not set, skipping upload.") + return + + print(f"Uploading data to {REPO_ID}...") + # hf upload REPO_ID data_repo / --repo-type dataset + # We upload the contents of data_repo to the root of the dataset + success = run_command(["hf", "upload", REPO_ID, "data_repo", ".", "--repo-type", "dataset"]) + if success: + print("Upload successful.") + else: + print("Upload failed.") + +def init_local(): + """Ensure data_repo has the necessary structure if download failed or it's new.""" + os.makedirs("data_repo/output", exist_ok=True) + os.makedirs("data_repo/processed", exist_ok=True) + os.makedirs("data_repo/uploads", exist_ok=True) + # database.db will be created by the app if it doesn't exist, + # but we should ensure it's in data_repo. + # We'll handle this in entrypoint.sh by symlinking. + +if __name__ == "__main__": + if len(sys.argv) < 2: + print("Usage: python hf_sync.py [download|upload|init]") + sys.exit(1) + + action = sys.argv[1] + if action == "download": + download() + elif action == "upload": + upload() + elif action == "init": + init_local() + else: + print(f"Unknown action: {action}") diff --git a/image_routes.py b/image_routes.py new file mode 100644 index 0000000000000000000000000000000000000000..c38f416ae72380efd17d6d483948993044d0f2a9 --- /dev/null +++ b/image_routes.py @@ -0,0 +1,24 @@ +from flask import Blueprint, send_from_directory, current_app + +image_bp = Blueprint('image_bp', __name__) + +@image_bp.route('/processed/') +def serve_processed_image(filename): + current_app.logger.info(f"Serving processed image: {filename}") + return send_from_directory(current_app.config['PROCESSED_FOLDER'], filename) + +@image_bp.route('/tmp/') +def serve_tmp_image(filename): + current_app.logger.info(f"Serving temporary image: {filename}") + return send_from_directory(current_app.config['TEMP_FOLDER'], filename) + +# Proxy routes for /neetprep/processed and /neetprep/tmp +@image_bp.route('/neetprep/processed/') +def serve_neetprep_processed_image(filename): + current_app.logger.info(f"Serving /neetprep/processed image: {filename}") + return send_from_directory(current_app.config['PROCESSED_FOLDER'], filename) + +@image_bp.route('/neetprep/tmp/') +def serve_neetprep_tmp_image(filename): + current_app.logger.info(f"Serving /neetprep/tmp image: {filename}") + return send_from_directory(current_app.config['TEMP_FOLDER'], filename) diff --git a/iperf3_client.sh b/iperf3_client.sh new file mode 100644 index 0000000000000000000000000000000000000000..e390352c9876c958c7ca4ac8b729405a457830f9 --- /dev/null +++ b/iperf3_client.sh @@ -0,0 +1,55 @@ +#!/bin/bash +# +# This script runs iperf3 in client mode to test network speed. +# + +# Check if iperf3 is installed +if ! command -v iperf3 &> /dev/null +then + echo "iperf3 could not be found. Please install it." + echo "For Debian/Ubuntu, use: sudo apt install iperf3" + echo "For CentOS/RHEL, use: sudo yum install iperf3" + echo "For macOS (with Homebrew), use: brew install iperf3" + echo "For Windows, download from iperf.fr" + exit 1 +fi + +# Check for server IP argument +if [ -z "$1" ] +then + echo "Usage: $0 " + exit 1 +fi + +SERVER_IP=$1 +PORT=5201 +PARALLEL_STREAMS=4 + +echo "" +echo "------------------------------------------------------------------" +echo " iperf3 Client" +echo "------------------------------------------------------------------" +echo " Server IP: ${SERVER_IP}" +echo " Port: ${PORT}" +echo "------------------------------------------------------------------" + +echo "" +echo "Running standard test (client to server)..." +iperf3 -c ${SERVER_IP} -p ${PORT} + +echo "" +echo "Running reverse test (server to client)..." +iperf3 -c ${SERVER_IP} -p ${PORT} -R + +echo "" +echo "Running test with ${PARALLEL_STREAMS} parallel streams (client to server)..." +iperf3 -c ${SERVER_IP} -p ${PORT} -P ${PARALLEL_STREAMS} + +echo "" +echo "Running reverse test with ${PARALLEL_STREAMS} parallel streams (server to client)..." +iperf3 -c ${SERVER_IP} -p ${PORT} -P ${PARALLEL_STREAMS} -R + +echo "" +echo "------------------------------------------------------------------" +echo " Test complete." +echo "------------------------------------------------------------------" diff --git a/iperf3_server.sh b/iperf3_server.sh new file mode 100644 index 0000000000000000000000000000000000000000..73be0ba3747cd59b72f9b276592a443b33c2ac14 --- /dev/null +++ b/iperf3_server.sh @@ -0,0 +1,31 @@ +#!/bin/bash +# +# This script starts an iperf3 server and displays the server's IP address. +# + +# Check if iperf3 is installed +if ! command -v iperf3 &> /dev/null +then + echo "iperf3 could not be found. Please install it." + echo "For Debian/Ubuntu, use: sudo apt install iperf3" + echo "For CentOS/RHEL, use: sudo yum install iperf3" + exit 1 +fi + +# Get the server's IP address +IP_ADDRESS=$(hostname -I | awk '{print $1}') +PORT=5201 + +echo "" +echo "------------------------------------------------------------------" +echo " iperf3 Server" +echo "------------------------------------------------------------------" +echo " Server IP: ${IP_ADDRESS}" +echo " Port: ${PORT}" +echo "" +echo "Starting iperf3 server..." +echo "Press Ctrl+C to stop the server." +echo "------------------------------------------------------------------" + +# Start the iperf3 server +iperf3 -s -p ${PORT} diff --git a/json_processor.py b/json_processor.py new file mode 100644 index 0000000000000000000000000000000000000000..9cc08041725da791013e749e481b5569e429e2dc --- /dev/null +++ b/json_processor.py @@ -0,0 +1,483 @@ +from flask import Blueprint, render_template, request, jsonify, current_app, redirect, url_for +from utils import get_db_connection +from PIL import Image, ImageDraw +import os +from utils import get_or_download_font +import json +import imgkit +from bs4 import BeautifulSoup +import re +import uuid +import requests +import base64 +import html +import sys +from concurrent.futures import ThreadPoolExecutor, as_completed +from json_processor_v3 import JSONProcessorV3 + +json_bp = Blueprint('json_bp', __name__) + +# --- SCHEMAS --- +SCHEMA_V2_1 = { + "version": "2.1", +} + +SCHEMA_V2 = { + # To be defined by the user +} + +SCHEMAS = { + "2.1": SCHEMA_V2_1, + "2": SCHEMA_V2, +} + + +# --- JSON PROCESSOR CLASS --- +class JSONProcessor: + def __init__(self, json_data): + self.data = json_data + self.version = self._detect_version() + + def _detect_version(self): + if self.data and "version" in self.data: + return str(self.data["version"]) + if self.data and "data" in self.data and "root" in self.data["data"]: + return "original" + if self.data and "root" in self.data: + return "original" + return None + + def process(self, statuses=None): + if self.version == "2.1": + return self._process_v2_1() + elif self.version == "2": + return self._process_v2() + elif self.version == "original": + return self._process_original(statuses=statuses) + else: + raise ValueError(f"Unsupported or unknown JSON version: {self.version}") + + def _process_v2_1(self): + def safe_int(value): + try: + return int(value) + except (ValueError, TypeError): + return None + + processed_questions = [] + statuses_to_include = self.data.get("config", {}).get("statuses_to_include", ["wrong", "unattempted"]) + for q in self.data.get("questions", []): + status = q.get("status") + if status in statuses_to_include: + options = q.get("options", []) + user_answer = "N/A" + if q.get('source') == 'classified': + user_answer = q.get('user_answer_index') + else: + user_answer_index = safe_int(q.get("user_answer_index")) + if user_answer_index is not None and user_answer_index < len(options): + user_answer = options[user_answer_index] + + correct_answer = "N/A" + if q.get('source') == 'classified': + correct_answer = q.get('correct_answer_index') + else: + correct_answer_index = safe_int(q.get("correct_answer_index")) + if correct_answer_index is not None and correct_answer_index < len(options): + correct_answer = options[correct_answer_index] + + processed_questions.append({ + "question": q.get("question_text"), + "yourAnswer": user_answer, + "correctAnswer": correct_answer, + "status": status, + "custom_fields": q.get("custom_fields", {}) + }) + return { + "test_name": self.data.get("test_name", "Unnamed Test"), + "questions": processed_questions, + "font_size": self.data.get("config", {}).get("font_size", 24), + "metadata": self.data.get("metadata", {}), + "config": self.data.get("config", {}) + } + + def _process_v2(self): + raise NotImplementedError("Processing for JSON schema v2 is not yet implemented. Please provide the schema.") + + def _process_original(self, statuses=None): + data_root = self.data + if 'data' in self.data and 'root' in self.data['data']: + data_root = self.data['data'] + + questions_data = data_root.get("root", {}).get("_testAttempt4d9rq8", {}).get("test", {}).get("_questions4dxVsH", {}).get("edges", []) + user_answers = data_root.get("root", {}).get("_testAttempt4d9rq8", {}).get("userAnswers", {}) + + selected_statuses = statuses if statuses is not None else self.data.get('statuses', ['wrong', 'unattempted']) + + processed_questions = [] + for edge in questions_data: + node = edge.get("node", {}) + question_id_encoded = node.get("id", "") + try: + question_id = base64.b64decode(question_id_encoded).decode('utf-8').split(':')[1] + except (IndexError, ValueError, TypeError): + continue + + question_text = node.get("question", "") + question_text = fix_font_family_in_html(question_text) + + options = node.get("options", []) + correct_option_index = node.get("correctOptionIndex") + user_answer_index_str = user_answers.get(question_id) + user_answer_index = int(user_answer_index_str) if user_answer_index_str is not None else None + + status = "unattempted" + if user_answer_index is not None: + status = "correct" if user_answer_index == correct_option_index else "wrong" + + if status in selected_statuses: + user_answer = "N/A" + if user_answer_index is not None and user_answer_index < len(options): + user_answer = options[user_answer_index] + + correct_answer = "N/A" + if correct_option_index is not None and correct_option_index < len(options): + correct_answer = options[correct_option_index] + + processed_questions.append({ + "question": question_text, + "yourAnswer": user_answer, + "correctAnswer": correct_answer, + "status": status + }) + + test_name = self.data.get('test_name') + if not test_name: + try: + test_name = data_root['root']['_testAttempt4d9rq8']['test']['name'] + except KeyError: + test_name = 'Uploaded Test' + + return { + "test_name": test_name, + "questions": processed_questions, + "font_size": self.data.get('font_size', 24) + } + +def html_to_image_worker(item, session_id, font_size, processed_folder, original_filename, index): + """Worker function to convert a single HTML question to an image.""" + question_html = item.get('question') + if not question_html: + question_html = "

Question text not provided.

" + + soup = BeautifulSoup(question_html, 'html.parser') + for img in soup.find_all('img'): + img_src = img.get('src') + if img_src: + if img_src.startswith('http'): + try: + response = requests.get(img_src) + if response.status_code == 200: + img_b64 = base64.b64encode(response.content).decode('utf-8') + img['src'] = f"data:image/png;base64,{img_b64}" + except Exception as e: + current_app.logger.error(f"Could not embed image {img_src}: {e}") + elif os.path.exists(img_src): + with open(img_src, 'rb') as f: + img_b64 = base64.b64encode(f.read()).decode('utf-8') + img['src'] = f"data:image/jpeg;base64,{img_b64}" + + question_html = str(soup) + + style = f"" + question_html = style + question_html + + processed_filename = f"processed_{session_id}_page0_crop{index}.jpg" + image_path = os.path.join(processed_folder, processed_filename) + + try: + imgkit.from_string(question_html, image_path) + except Exception: + image_font = get_or_download_font(font_size=font_size) + soup = BeautifulSoup(question_html, 'html.parser') + question_text = soup.get_text() + image = Image.new('RGB', (800, 600), 'white') + draw = ImageDraw.Draw(image) + final_y = draw_multiline_text(draw, question_text, (20, 20), image_font, 760, 'black') + image = image.crop((0, 0, 800, final_y + 20)) + image.save(image_path, 'JPEG') + + return { + 'processed_filename': processed_filename, + 'original_filename': original_filename, + 'item': item, + 'index': index + } + +from flask_login import login_required, current_user + +def _process_json_and_generate_pdf(raw_data, user_id): + """ + Helper function to process JSON data, generate images, and create a PDF. + This is called by both the /json_upload route and directly from other modules. + """ + from utils import get_or_download_font, create_a4_pdf_from_images + + conn = get_db_connection() + try: + if not raw_data: + return {'error': 'No JSON payload received.'}, 400 + + processor = JSONProcessor(raw_data) + processed_data = processor.process() + + test_name = processed_data.get("test_name") + processed_questions = processed_data.get("questions") + font_size = processed_data.get("font_size") + metadata = processed_data.get("metadata", {}) + tags = metadata.get("tags", "programmatic") + layout = processed_data.get("config", {}).get("layout", {}) + + images_per_page = int(layout.get('images_per_page', 4)) + orientation = layout.get('orientation', 'portrait') + grid_rows = int(layout.get('grid_rows')) if layout.get('grid_rows') else None + grid_cols = int(layout.get('grid_cols')) if layout.get('grid_cols') else None + practice_mode = layout.get('practice_mode', 'none') + + session_id = str(uuid.uuid4()) + conn.execute('INSERT INTO sessions (id, original_filename, user_id) VALUES (?, ?, ?)', (session_id, f"{test_name}.json", user_id)) + + original_filename = f"{session_id}_dummy_original.png" + conn.execute( + 'INSERT INTO images (session_id, image_index, filename, original_name, image_type) VALUES (?, ?, ?, ?, ?)', + (session_id, 0, original_filename, 'JSON Upload', 'original') + ) + + with ThreadPoolExecutor(max_workers=10) as executor: + list(executor.map( + lambda p: html_to_image_worker(*p), + [(item, session_id, font_size, current_app.config['PROCESSED_FOLDER'], original_filename, i) for i, item in enumerate(processed_questions)] + )) + + for i, item in enumerate(processed_questions): + processed_filename = f"processed_{session_id}_page0_crop{i}.jpg" + image_insert_result = conn.execute( + 'INSERT INTO images (session_id, image_index, filename, original_name, processed_filename, image_type) VALUES (?, ?, ?, ?, ?, ?)', + (session_id, i + 1, original_filename, f"Question {i+1}", processed_filename, 'cropped') + ) + image_id = image_insert_result.lastrowid + conn.execute( + 'INSERT INTO questions (session_id, image_id, question_number, status, marked_solution, actual_solution) VALUES (?, ?, ?, ?, ?, ?)', + (session_id, image_id, str(i + 1), item.get('status'), item.get('yourAnswer'), item.get('correctAnswer')) + ) + + conn.commit() + + if raw_data.get('view') is True: + query = "SELECT q.*, i.processed_filename FROM questions q JOIN images i ON q.image_id = i.id WHERE q.session_id = ? ORDER BY i.id" + all_questions = [dict(row) for row in conn.execute(query, (session_id,)).fetchall()] + if not all_questions: + return {'error': 'No questions were processed to generate a PDF.'}, 400 + + from datetime import datetime + from werkzeug.utils import secure_filename + pdf_filename = f"{secure_filename(test_name)}_{session_id[:8]}.pdf" + + create_a4_pdf_from_images( + image_info=all_questions, base_folder=current_app.config['PROCESSED_FOLDER'], output_filename=pdf_filename, + images_per_page=images_per_page, output_folder=current_app.config['OUTPUT_FOLDER'], + orientation=orientation, grid_rows=grid_rows, grid_cols=grid_cols, practice_mode=practice_mode + ) + conn.execute( + 'INSERT INTO generated_pdfs (session_id, filename, subject, tags, notes, source_filename, user_id) VALUES (?, ?, ?, ?, ?, ?, ?)', + (session_id, pdf_filename, test_name, tags, 'Generated automatically via JSON upload.', f"{test_name}.json", user_id) + ) + conn.commit() + return {'success': True, 'view_url': url_for('main.view_pdf', filename=pdf_filename, _external=True)}, 200 + else: + return {'success': True, 'edit_url': url_for('main.question_entry_v2', session_id=session_id, test_name=test_name, _external=True)}, 200 + + except Exception as e: + if conn: + conn.rollback() + current_app.logger.error(f"Error in _process_json_and_generate_pdf: {repr(e)}") + return {'error': str(e)}, 500 + finally: + if conn: + conn.close() + +@json_bp.route('/json_upload', methods=['GET', 'POST']) +@login_required +def json_upload(): + if request.method == 'POST': + result, status_code = _process_json_and_generate_pdf(request.json, current_user.id) + return jsonify(result), status_code + return render_template('json_upload.html') + + +def draw_multiline_text(draw, text, position, font, max_width, fill): + x, y = position + lines = text.split('\n') + wrapped_lines = [] + for line in lines: + if font.getlength(line) <= max_width: + wrapped_lines.append(line) + else: + current_line = '' + for word in line.split(' '): + if font.getlength(current_line + word + ' ') <= max_width: + current_line += word + ' ' + else: + wrapped_lines.append(current_line) + current_line = word + ' ' + wrapped_lines.append(current_line) + + line_height = font.getbbox('A')[3] - font.getbbox('A')[1] if hasattr(font, 'getbbox') else font.getsize('A')[1] + for line in wrapped_lines: + draw.text((x, y), line, fill=fill, font=font) + y += line_height + 5 + return y + +def fix_font_family_in_html(html_string): + if not html_string: + return html_string + + html_string = html.unescape(html_string) + pattern = r'font-family:\s*"([^"]+(?:,\s*"[^"]+"\s*)*)"' + + def replace_font_family(match): + font_value = match.group(1) + font_value = font_value.replace('"', "'") + return f"font-family:'{font_value}'" + + html_string = re.sub(pattern, replace_font_family, html_string) + html_string = re.sub(r'"', "'", html_string) + + return html_string + + +@json_bp.route('/process_json', methods=['POST']) +def process_json(): + request_data = request.json + data_to_process = request_data.get('data', request_data) + selected_statuses = request_data.get('statuses', ['wrong', 'unattempted']) + + try: + processor = JSONProcessor(data_to_process) + processed_data = processor.process(statuses=selected_statuses) + return jsonify({'success': True, 'questions': processed_data.get('questions')}) + except Exception as e: + current_app.logger.error(f"Error in process_json: {repr(e)}") + return jsonify({'success': False, 'error': str(e)}) + + +@json_bp.route('/save_processed_json', methods=['POST']) +@login_required +def save_processed_json(): + from app import get_db_connection + questions_data = request.form.get('questions_data') + test_name = request.form.get('test_name') + font_size = int(request.form.get('font_size', 24)) + + try: + questions = json.loads(questions_data) + except json.JSONDecodeError as e: + try: + fixed_data = questions_data.replace('"', "'") + fixed_data = re.sub(r'font-family:"([^"]+)"', lambda m: f"font-family:'{m.group(1).replace('"', "'")}'", fixed_data) + questions = json.loads(fixed_data) + except Exception as inner_e: + current_app.logger.error(f"Initial JSONDecodeError: {e}") + current_app.logger.error(f"Could not fix JSON data. Error: {inner_e}") + current_app.logger.error(f"Problematic JSON data (raw): {repr(questions_data)}") + return jsonify({'error': 'Invalid JSON data received.'}), 400 + + session_id = str(uuid.uuid4()) + conn = get_db_connection() + + try: + conn.execute('INSERT INTO sessions (id, original_filename, user_id) VALUES (?, ?, ?)', (session_id, 'JSON Upload', current_user.id)) + + original_filename = f"{session_id}_dummy_original.png" + conn.execute( + 'INSERT INTO images (session_id, image_index, filename, original_name, image_type) VALUES (?, ?, ?, ?, ?)', + (session_id, 0, original_filename, 'JSON Upload', 'original') + ) + + for i, item in enumerate(questions): + question_html = item.get('question') + your_answer = item.get('yourAnswer') + correct_answer = item.get('correctAnswer') + + if not question_html: + question_html = "

Question text was not provided.

" + + soup = BeautifulSoup(question_html, 'html.parser') + for img in soup.find_all('img'): + img_src = img.get('src') + if img_src and img_src.startswith('http'): + try: + response = requests.get(img_src) + if response.status_code == 200: + img_b64 = base64.b64encode(response.content).decode('utf-8') + img['src'] = f"data:image/png;base64,{img_b64}" + except Exception as e: + current_app.logger.error(f"Could not embed image {img_src}: {e}") + + question_html = str(soup) + + style = f"" + question_html = style + question_html + + processed_filename = f"processed_{session_id}_page0_crop{i}.jpg" + image_path = os.path.join(current_app.config['PROCESSED_FOLDER'], processed_filename) + + try: + imgkit.from_string(question_html, image_path) + except Exception as e: + image_font = get_or_download_font(font_size=font_size) + soup = BeautifulSoup(question_html, 'html.parser') + question_text = soup.get_text() + image = Image.new('RGB', (800, 600), 'white') + draw = ImageDraw.Draw(image) + final_y = draw_multiline_text(draw, question_text, (20, 20), image_font, 760, 'black') + image = image.crop((0, 0, 800, final_y + 20)) + image.save(image_path, 'JPEG') + + image_insert_result = conn.execute( + 'INSERT INTO images (session_id, image_index, filename, original_name, processed_filename, image_type) VALUES (?, ?, ?, ?, ?, ?)', + (session_id, i + 1, original_filename, f"Question {i+1}", processed_filename, 'cropped') + ) + image_id = image_insert_result.lastrowid + + status = item.get('status') + conn.execute( + 'INSERT INTO questions (session_id, image_id, question_number, status, marked_solution, actual_solution) VALUES (?, ?, ?, ?, ?, ?)', + (session_id, image_id, str(i + 1), status, your_answer, correct_answer) + ) + + conn.commit() + return redirect(url_for('main.question_entry_v2', session_id=session_id, test_name=test_name)) + except Exception as e: + conn.rollback() + current_app.logger.error(f"Error in save_processed_json: {repr(e)}") + return jsonify({'error': str(e)}), 500 + finally: + conn.close() + +@json_bp.route('/json_upload_v3', methods=['POST']) +def json_upload_v3(): + if not request.json: + return jsonify({'error': 'No JSON payload received.'}), 400 + + processor_v3 = JSONProcessorV3(request.json) + try: + # Pass a user_id, for now a default. In a real app, this might come from an API key. + result = processor_v3.process(user_id=45) + return jsonify(result), 200 + except ValueError as e: + current_app.logger.error(f"JSON v3.0 processing error: {e}") + return jsonify({'error': str(e)}), 400 + except Exception as e: + current_app.logger.error(f"Unhandled error during JSON v3.0 processing: {e}") + return jsonify({'error': 'An internal server error occurred.'}), 500 diff --git a/json_processor_v3.py b/json_processor_v3.py new file mode 100644 index 0000000000000000000000000000000000000000..110767693e32aba0136f3822b7bd18820f7218fa --- /dev/null +++ b/json_processor_v3.py @@ -0,0 +1,248 @@ +import json +import os +import requests +from concurrent.futures import ThreadPoolExecutor, as_completed +from jsonschema import validate, ValidationError +import uuid +from flask import current_app, url_for +from werkzeug.utils import secure_filename +import sqlite3 # Import sqlite3 +import sys + +# Ensure current directory is in Python path for local imports +sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) + +from database import get_db_connection +from utils import create_a4_pdf_from_images + + +# JSON v3.0 Schema for validation +JSON_V3_SCHEMA = { + "type": "object", + "properties": { + "version": {"type": "string", "const": "3.0"}, + "source": {"type": "string"}, + "test_name": {"type": "string"}, + "test_id": {"type": "string"}, + "test_mapping_id": {"type": "string"}, + "metadata": {"type": "object"}, + "config": { + "type": "object", + "properties": { + "statuses_to_include": {"type": "array", "items": {"type": "string"}}, + "layout": { + "type": "object", + "properties": { + "images_per_page": {"type": "integer"}, + "orientation": {"type": "string"} + }, + "required": ["images_per_page", "orientation"] + } + }, + "required": ["statuses_to_include", "layout"] + }, + "questions": { + "type": "array", + "items": { + "type": "object", + "properties": { + "question_number": {"type": "string"}, + "image_url": {"type": "string", "format": "uri"}, + "status": {"type": "string"}, + "marked_solution": {"type": "string"}, + "correct_solution": {"type": "string"}, + "subject": {"type": "string"}, + "chapter": {"type": "string"}, + "topic": {"type": "string"}, + "time_taken": {"type": "integer"} + }, + "required": ["question_number", "image_url", "status", "marked_solution", "correct_solution", "subject", "time_taken"] + } + }, + "view": {"type": "boolean"} + }, + "required": ["version", "source", "test_name", "test_id", "test_mapping_id", "config", "questions", "view"] +} + +class JSONProcessorV3: + def __init__(self, data=None): + self.data = data + + def validate(self): + """Validates the JSON data against the v3.0 schema.""" + try: + validate(instance=self.data, schema=JSON_V3_SCHEMA) + return True + except ValidationError as e: + raise ValueError(f"Schema validation failed: {e.message}") + + def download_image_from_url(self, url, save_path, timeout=30): + """Downloads an image from a URL and saves it to a path.""" + try: + response = requests.get(url, timeout=timeout) + response.raise_for_status() + with open(save_path, 'wb') as f: + f.write(response.content) + return save_path + except requests.exceptions.RequestException as e: + print(f"Error downloading image from {url}: {e}") # Keep print for tests + if current_app: + current_app.logger.error(f"Error downloading image from {url}: {e}") + return None + + def download_images_parallel(self, questions, output_dir, session_id, max_workers=10): + """Downloads all images in parallel and returns a map of question number to local path.""" + image_paths = {} + with ThreadPoolExecutor(max_workers=max_workers) as executor: + future_to_question = { + executor.submit( + self.download_image_from_url, + q['image_url'], + os.path.join(output_dir, f"{session_id}_q_{q['question_number']}.png") + ): q for q in questions if q.get('image_url') + } + + for future in as_completed(future_to_question): + question = future_to_question[future] + url = question['image_url'] + try: + path = future.result() + if path: + image_paths[question['question_number']] = path + current_app.logger.info(f"Successfully downloaded image from {url}") + else: + current_app.logger.error(f"Failed to download image from {url}") + except Exception as e: + current_app.logger.error(f"Error processing image for question {question.get('question_number')} from {url}: {e}") + return image_paths + + def process(self, user_id=1): # Default user_id for now, replace with actual user + """Main processing logic for the v3.0 payload, including DB insertion and PDF generation.""" + if not self.data: + raise ValueError("No data provided to process.") + + current_app.logger.info("Starting processing of JSON v3.0 payload.") + current_app.logger.info(f"Test Name: {self.data.get('test_name')}") + current_app.logger.info(f"Test ID: {self.data.get('test_id')}") + current_app.logger.info(f"Metadata: {self.data.get('metadata')}") + + if not self.validate(): + raise ValueError("Schema validation failed.") + + conn = get_db_connection() + try: + test_name = self.data['test_name'] + test_id = self.data['test_id'] + test_mapping_id = self.data['test_mapping_id'] + questions_payload = self.data['questions'] + view_mode = self.data.get('view', False) + metadata = json.dumps(self.data.get('metadata', {})) # Store metadata as JSON string + + config = self.data.get('config', {}) + layout = config.get('layout', {}) + images_per_page = layout.get('images_per_page', 4) + orientation = layout.get('orientation', 'portrait') + + session_id = str(uuid.uuid4()) + original_filename = f"{test_name}.json" # Name of the JSON file that was uploaded + + conn.execute( + 'INSERT INTO sessions (id, original_filename, user_id, test_id, test_mapping_id, source, metadata) VALUES (?, ?, ?, ?, ?, ?, ?)', + (session_id, original_filename, user_id, test_id, test_mapping_id, self.data.get('source', 'manual'), metadata) + ) + + processed_folder = current_app.config.get('PROCESSED_FOLDER', 'processed') + os.makedirs(processed_folder, exist_ok=True) + + current_app.logger.info(f"Downloading images for test {test_id} to {processed_folder}") + image_path_map = self.download_images_parallel(questions_payload, processed_folder, session_id) + + image_records = [] + question_records = [] + + for i, q_data in enumerate(questions_payload): + question_number = q_data['question_number'] + + # Check if image was downloaded + processed_filename = None + local_image_path = image_path_map.get(question_number) + if local_image_path: + processed_filename = os.path.basename(local_image_path) + + # Insert into images table + image_insert_result = conn.execute( + 'INSERT INTO images (session_id, image_index, filename, original_name, processed_filename, image_type) VALUES (?, ?, ?, ?, ?, ?)', + (session_id, i + 1, q_data.get('image_url', ''), f"Question {question_number}", processed_filename, 'cropped' if processed_filename else 'original_url_only') + ) + image_id = image_insert_result.lastrowid + + # Insert into questions table + question_records.append(( + session_id, image_id, question_number, q_data['status'], + q_data['marked_solution'], q_data['correct_solution'], + q_data.get('subject'), q_data.get('chapter'), q_data.get('topic'), q_data.get('time_taken') + )) + + conn.executemany( + 'INSERT INTO questions (session_id, image_id, question_number, status, marked_solution, actual_solution, subject, chapter, topic, time_taken) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)', + question_records + ) + + conn.commit() + + response_data = { + "status": "success", + "message": "JSON v3.0 processed successfully." + } + + if view_mode: + query = "SELECT q.*, i.processed_filename FROM questions q JOIN images i ON q.image_id = i.id WHERE q.session_id = ? ORDER BY i.id" + all_questions = [dict(row) for row in conn.execute(query, (session_id,)).fetchall()] + + if not all_questions: + conn.rollback() + raise ValueError('No questions found for PDF generation.') + + pdf_output_folder = current_app.config.get('OUTPUT_FOLDER', 'output') + os.makedirs(pdf_output_folder, exist_ok=True) + + pdf_filename = f"{secure_filename(test_name)}_{session_id[:8]}.pdf" + + create_a4_pdf_from_images( + image_info=all_questions, base_folder=processed_folder, output_filename=pdf_filename, + images_per_page=images_per_page, output_folder=pdf_output_folder, + orientation=orientation + ) + + conn.execute( + 'INSERT INTO generated_pdfs (session_id, filename, subject, tags, notes, source_filename, user_id) VALUES (?, ?, ?, ?, ?, ?, ?)', + (session_id, pdf_filename, test_name, test_mapping_id, 'Generated automatically via JSON v3.0 upload.', original_filename, user_id) + ) + conn.commit() + response_data['view_url'] = url_for('main.view_pdf', filename=pdf_filename, _external=True) + response_data['message'] = "PDF auto-generated and saved." + else: + response_data['edit_url'] = url_for('main.question_entry_v2', session_id=session_id, test_name=test_name, _external=True) + response_data['message'] = "Session created for manual review." + + return response_data + + except ValueError as e: + if conn: + conn.rollback() + current_app.logger.error(f"JSON v3.0 processing error: {e}") + raise # Re-raise to be caught by the endpoint + except sqlite3.Error as e: + if conn: + conn.rollback() + current_app.logger.error(f"Database error during JSON v3.0 processing: {e}") + raise ValueError(f"Database error: {e}") + except Exception as e: + if conn: + conn.rollback() + current_app.logger.error(f"Unhandled error during JSON v3.0 processing: {e}") + raise ValueError(f"An unexpected error occurred: {e}") + finally: + if conn: + conn.close() + diff --git a/migrations/add_v3_fields.sql b/migrations/add_v3_fields.sql new file mode 100644 index 0000000000000000000000000000000000000000..54ee30be7001928feae84f4fde7b1ebbccdca9c9 --- /dev/null +++ b/migrations/add_v3_fields.sql @@ -0,0 +1,17 @@ +-- Add new columns to the 'questions' table +ALTER TABLE questions ADD COLUMN topic TEXT; +ALTER TABLE questions ADD COLUMN time_taken INTEGER; +ALTER TABLE questions ADD COLUMN difficulty TEXT; +ALTER TABLE questions ADD COLUMN source TEXT DEFAULT 'manual'; +ALTER TABLE questions ADD COLUMN test_id TEXT; +ALTER TABLE questions ADD COLUMN test_mapping_id TEXT; + +-- Add new columns to the 'sessions' table +ALTER TABLE sessions ADD COLUMN test_id TEXT; +ALTER TABLE sessions ADD COLUMN test_mapping_id TEXT; +ALTER TABLE sessions ADD COLUMN source TEXT DEFAULT 'manual'; +ALTER TABLE sessions ADD COLUMN metadata TEXT; + +-- Create indexes for performance +CREATE INDEX IF NOT EXISTS idx_questions_test_mapping_id ON questions (test_mapping_id); +CREATE INDEX IF NOT EXISTS idx_sessions_test_mapping_id ON sessions (test_mapping_id); diff --git a/migrations/migrate.py b/migrations/migrate.py new file mode 100644 index 0000000000000000000000000000000000000000..933af22797f06e25be8d4919cda3d5773c6cec4a --- /dev/null +++ b/migrations/migrate.py @@ -0,0 +1,117 @@ +import sqlite3 +import os +import shutil +from datetime import datetime + +DATABASE_PATH = 'database.db' +MIGRATIONS_DIR = 'migrations' +BACKUP_DIR = 'backups' + +def backup_database(): + """Creates a timestamped backup of the database.""" + if not os.path.exists(BACKUP_DIR): + os.makedirs(BACKUP_DIR) + + timestamp = datetime.now().strftime('%Y%m%d_%H%M%S') + backup_filename = f"database_backup_{timestamp}.db" + backup_path = os.path.join(BACKUP_DIR, backup_filename) + + try: + shutil.copy2(DATABASE_PATH, backup_path) + print(f"Successfully created database backup at: {backup_path}") + return backup_path + except FileNotFoundError: + print(f"Warning: Database file not found at {DATABASE_PATH}. Cannot create backup.") + return None + +def apply_migration(migration_file): + """Applies a single SQL migration file to the database.""" + try: + conn = sqlite3.connect(DATABASE_PATH) + cursor = conn.cursor() + + with open(migration_file, 'r') as f: + sql_script = f.read() + + # Split script into individual statements + statements = [s.strip() for s in sql_script.split(';') if s.strip()] + + print(f"Applying migration: {migration_file}...") + for statement in statements: + try: + cursor.execute(statement) + print(f" Executed: {statement[:80]}...") + except sqlite3.OperationalError as e: + # This is a common error if the column already exists. We can treat it as a warning. + if "duplicate column name" in str(e): + print(f" Warning: {e}. Skipping statement.") + else: + raise # Re-raise other operational errors + + conn.commit() + conn.close() + print(f"Successfully applied migration: {migration_file}") + except sqlite3.Error as e: + print(f"Error applying migration {migration_file}: {e}") + return False + return True + +def verify_migration(): + """Verifies that the new columns exist in the tables.""" + print("\nVerifying migration...") + try: + conn = sqlite3.connect(DATABASE_PATH) + cursor = conn.cursor() + + # Verify 'questions' table + cursor.execute("PRAGMA table_info(questions);") + questions_columns = [row[1] for row in cursor.fetchall()] + expected_q_cols = ['topic', 'time_taken', 'difficulty', 'source', 'test_id', 'test_mapping_id'] + missing_q_cols = [col for col in expected_q_cols if col not in questions_columns] + if not missing_q_cols: + print("βœ… 'questions' table verification successful.") + else: + print(f"❌ 'questions' table verification failed. Missing columns: {missing_q_cols}") + + # Verify 'sessions' table + cursor.execute("PRAGMA table_info(sessions);") + sessions_columns = [row[1] for row in cursor.fetchall()] + expected_s_cols = ['test_id', 'test_mapping_id', 'source', 'metadata'] + missing_s_cols = [col for col in expected_s_cols if col not in sessions_columns] + if not missing_s_cols: + print("βœ… 'sessions' table verification successful.") + else: + print(f"❌ 'sessions' table verification failed. Missing columns: {missing_s_cols}") + + conn.close() + + return not missing_q_cols and not missing_s_cols + + except sqlite3.Error as e: + print(f"Error during verification: {e}") + return False + +def main(): + """Main function to run the migration process.""" + print("--- Starting Database Migration ---") + + backup_path = backup_database() + if not backup_path and os.path.exists(DATABASE_PATH): + print("Aborting migration due to backup failure.") + return + + migration_file = os.path.join(MIGRATIONS_DIR, 'add_v3_fields.sql') + if not os.path.exists(migration_file): + print(f"Error: Migration file not found at {migration_file}") + return + + if apply_migration(migration_file): + verify_migration() + else: + print("\nMigration failed. Please check the errors above.") + print("You may need to restore from the backup if the database is in an inconsistent state.") + + print("--- Migration Process Finished ---") + +if __name__ == "__main__": + main() diff --git a/migrations/migrate_subjective.py b/migrations/migrate_subjective.py new file mode 100644 index 0000000000000000000000000000000000000000..b8ac45790abd3a410a829fe8c592c103a0db6045 --- /dev/null +++ b/migrations/migrate_subjective.py @@ -0,0 +1,64 @@ +import sqlite3 +import json +import time + +DATABASE = 'database.db' + +def get_db_connection(): + conn = sqlite3.connect(DATABASE) + conn.row_factory = sqlite3.Row + return conn + +def migrate_subjective_questions(): + print("Starting migration of subjective questions...") + conn = get_db_connection() + + try: + # Fetch all subjective questions + questions = conn.execute('SELECT * FROM subjective_questions').fetchall() + + updated_count = 0 + + for q in questions: + q_id = q['id'] + q_html = q['question_html'] + q_json = q['question_json'] + + # Check if json is empty or None + if not q_json or q_json.strip() == '': + print(f"Migrating Question ID: {q_id}") + + # Create EditorJS block structure + editor_js_data = { + "time": int(time.time() * 1000), + "blocks": [ + { + "type": "paragraph", + "data": { + "text": q_html + } + } + ], + "version": "2.22.2" # Using a standard version + } + + json_string = json.dumps(editor_js_data) + + # Update the record + conn.execute( + 'UPDATE subjective_questions SET question_json = ? WHERE id = ?', + (json_string, q_id) + ) + updated_count += 1 + + conn.commit() + print(f"Migration completed. Updated {updated_count} questions.") + + except Exception as e: + conn.rollback() + print(f"Error during migration: {e}") + finally: + conn.close() + +if __name__ == "__main__": + migrate_subjective_questions() diff --git a/neetprep.py b/neetprep.py new file mode 100644 index 0000000000000000000000000000000000000000..4f7fd91a7939940b8b867255c6857b951b1146e0 --- /dev/null +++ b/neetprep.py @@ -0,0 +1,467 @@ +from flask import Blueprint, render_template, request, jsonify, current_app, url_for +from flask_login import login_required, current_user +from utils import get_db_connection +import requests +import time +import os +import json +import sys +from concurrent.futures import ThreadPoolExecutor, as_completed +from bs4 import BeautifulSoup +import math +import imgkit + +from gemini_classifier import classify_questions_with_gemini +from nova_classifier import classify_questions_with_nova +from json_processor import _process_json_and_generate_pdf +from json_processor import _process_json_and_generate_pdf + +neetprep_bp = Blueprint('neetprep_bp', __name__) + +# ... (Constants and GraphQL queries remain the same) ... +ENDPOINT_URL = "https://www.neetprep.com/graphql" +USER_ID = "VXNlcjozNTY5Mzcw=" + +HEADERS = { + 'accept': '*/*', + 'content-type': 'application/json', + 'origin': 'https://www.neetprep.com', + 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/141.0.0.0 Safari/537.36', +} + +# --- Queries --- +query_template_step1 = 'query GetAttempts {{ testAttempts( limit: {limit}, offset: {offset}, where: {{ userId: "{userId}" }} ) {{ id completed }} }}' +query_template_step2 = 'query GetIncorrectIds {{ incorrectQuestions( testAttemptId: "{attemptId}", first: 200 ) {{ id }} }}' +query_template_step3 = ''' +query GetQuestionDetails {{ + question(id: "{questionId}") {{ + id + question + options + correctOptionIndex + level + topics(first: 1) {{ + edges {{ + node {{ + name + subjects(first: 1) {{ + edges {{ + node {{ name }} + }} + }} + }} + }} + }} + }} +}} +''' + +def fetch_question_details(q_id): + """Worker function to fetch details for a single question.""" + result = run_hardcoded_query(query_template_step3, questionId=q_id) + if result and 'data' in result and 'question' in result['data'] and result['data']['question']: + return result['data']['question'] + return None + +@neetprep_bp.route('/neetprep') +@login_required +def index(): + """Renders the main NeetPrep UI with topics and counts.""" + conn = get_db_connection() + selected_subject = request.args.get('subject', 'All') + AVAILABLE_SUBJECTS = ["All", "Biology", "Chemistry", "Physics", "Mathematics"] + + neetprep_topic_counts = {} + unclassified_count = 0 + if current_user.neetprep_enabled: + # Get NeetPrep question counts per topic, filtered by subject + if selected_subject != 'All': + neetprep_topics_query = 'SELECT topic, COUNT(*) as count FROM neetprep_questions WHERE subject = ? GROUP BY topic' + neetprep_topics = conn.execute(neetprep_topics_query, (selected_subject,)).fetchall() + else: + neetprep_topics_query = 'SELECT topic, COUNT(*) as count FROM neetprep_questions GROUP BY topic' + neetprep_topics = conn.execute(neetprep_topics_query).fetchall() + neetprep_topic_counts = {row['topic']: row['count'] for row in neetprep_topics} + unclassified_count = conn.execute("SELECT COUNT(*) as count FROM neetprep_questions WHERE topic = 'Unclassified'").fetchone()['count'] + + + # Get classified question counts per chapter for the current user, filtered by subject + query_params = [current_user.id] + base_query = """ + SELECT q.chapter, COUNT(*) as count + FROM questions q + JOIN sessions s ON q.session_id = s.id + WHERE s.user_id = ? AND q.subject IS NOT NULL AND q.chapter IS NOT NULL + """ + if selected_subject != 'All': + base_query += " AND q.subject = ? " + query_params.append(selected_subject) + + base_query += " GROUP BY q.chapter" + + classified_chapters = conn.execute(base_query, query_params).fetchall() + classified_chapter_counts = {row['chapter']: row['count'] for row in classified_chapters} + + # Combine the topics + all_topics = set(neetprep_topic_counts.keys()) | set(classified_chapter_counts.keys()) + + combined_topics = [] + for topic in sorted(list(all_topics)): + combined_topics.append({ + 'topic': topic, + 'neetprep_count': neetprep_topic_counts.get(topic, 0), + 'my_questions_count': classified_chapter_counts.get(topic, 0) + }) + + conn.close() + return render_template('neetprep.html', + topics=combined_topics, + unclassified_count=unclassified_count, + available_subjects=AVAILABLE_SUBJECTS, + selected_subject=selected_subject, + neetprep_enabled=current_user.neetprep_enabled) + +@neetprep_bp.route('/neetprep/sync', methods=['POST']) +@login_required +def sync_neetprep_data(): + data = request.json + force_sync = data.get('force', False) + print(f"NeetPrep sync started by user {current_user.id}. Force sync: {force_sync}") + + try: + conn = get_db_connection() + + if force_sync: + print("Force sync enabled. Clearing processed attempts and questions tables.") + conn.execute('DELETE FROM neetprep_processed_attempts') + conn.execute('DELETE FROM neetprep_questions') + conn.commit() + + processed_attempts_rows = conn.execute('SELECT attempt_id FROM neetprep_processed_attempts').fetchall() + processed_attempt_ids = {row['attempt_id'] for row in processed_attempts_rows} + + all_attempt_ids = [] + offset = 0 + limit = 100 + print("Fetching test attempts from NeetPrep API...") + while True: + result = run_hardcoded_query(query_template_step1, limit=limit, offset=offset, userId=USER_ID) + if not result or 'data' not in result or not result['data'].get('testAttempts'): + break + attempts = result['data']['testAttempts'] + if not attempts: break + all_attempt_ids.extend([a['id'] for a in attempts if a.get('completed')]) + offset += limit + time.sleep(0.2) + + new_attempts = [aid for aid in all_attempt_ids if aid not in processed_attempt_ids] + print(f"Found {len(new_attempts)} new attempts to process.") + if not new_attempts: + conn.close() + return jsonify({'status': 'No new test attempts to sync. Everything is up-to-date.'}), 200 + + incorrect_question_ids = set() + print("Fetching incorrect question IDs for new attempts...") + for attempt_id in new_attempts: + result = run_hardcoded_query(query_template_step2, attemptId=attempt_id) + if result and 'data' in result and result['data'].get('incorrectQuestions'): + for q in result['data']['incorrectQuestions']: + incorrect_question_ids.add(q['id']) + time.sleep(0.2) + + existing_question_ids_rows = conn.execute('SELECT id FROM neetprep_questions').fetchall() + existing_question_ids = {row['id'] for row in existing_question_ids_rows} + new_question_ids = list(incorrect_question_ids - existing_question_ids) + print(f"Found {len(new_question_ids)} new unique incorrect questions to fetch details for.") + + if not new_question_ids: + for attempt_id in new_attempts: + conn.execute('INSERT INTO neetprep_processed_attempts (attempt_id) VALUES (?)', (attempt_id,)) + conn.commit() + conn.close() + return jsonify({'status': 'Sync complete. No new questions found, but attempts log updated.'}), 200 + + questions_to_insert = [] + total_new = len(new_question_ids) + completed = 0 + print(f"Fetching details for {total_new} questions...") + with ThreadPoolExecutor(max_workers=10) as executor: + future_to_qid = {executor.submit(fetch_question_details, qid): qid for qid in new_question_ids} + for future in as_completed(future_to_qid): + q_data = future.result() + if q_data: + topic_name = "Unclassified" + try: + topic_name = q_data['topics']['edges'][0]['node']['name'] + except (IndexError, TypeError, KeyError): pass + + questions_to_insert.append((q_data.get('id'), q_data.get('question'), json.dumps(q_data.get('options', [])), q_data.get('correctOptionIndex'), q_data.get('level', 'N/A'), topic_name, "Unclassified")) + + completed += 1 + percentage = int((completed / total_new) * 100) + sys.stdout.write(f'\rSync Progress: {completed}/{total_new} ({percentage}%)') + sys.stdout.flush() + + print("\nAll questions fetched.") + + if questions_to_insert: + conn.executemany("INSERT INTO neetprep_questions (id, question_text, options, correct_answer_index, level, topic, subject) VALUES (?, ?, ?, ?, ?, ?, ?)", questions_to_insert) + + for attempt_id in new_attempts: + conn.execute('INSERT INTO neetprep_processed_attempts (attempt_id) VALUES (?)', (attempt_id,)) + + conn.commit() + conn.close() + + return jsonify({'status': f'Sync complete. Added {len(questions_to_insert)} new questions.'}), 200 + + except Exception as e: + current_app.logger.error(f"Error during NeetPrep sync: {repr(e)}") + if 'conn' in locals() and conn: + conn.close() + return jsonify({'error': f"A critical error occurred during sync: {repr(e)}"}), 500 + +@neetprep_bp.route('/neetprep/classify', methods=['POST']) +@login_required +def classify_unclassified_questions(): + """Classifies all questions marked as 'Unclassified' in batches.""" + print("Starting classification of 'Unclassified' questions.") + conn = get_db_connection() + unclassified_questions = conn.execute("SELECT id, question_text FROM neetprep_questions WHERE topic = 'Unclassified'").fetchall() + total_to_classify = len(unclassified_questions) + + if total_to_classify == 0: + conn.close() + return jsonify({'status': 'No unclassified questions to process.'}) + + batch_size = 10 + num_batches = math.ceil(total_to_classify / batch_size) + total_classified_count = 0 + + print(f"Found {total_to_classify} questions. Processing in {num_batches} batches of {batch_size}.") + + for i in range(num_batches): + batch_start_time = time.time() + start_index = i * batch_size + end_index = start_index + batch_size + + batch = unclassified_questions[start_index:end_index] + + question_texts = [q['question_text'] for q in batch] + question_ids = [q['id'] for q in batch] + + print(f"\nProcessing Batch {i+1}/{num_batches}...") + + try: + # Choose classifier based on user preference + classifier_model = getattr(current_user, 'classifier_model', 'gemini') + + if classifier_model == 'nova': + print("Classifying with Nova API...") + classification_result = classify_questions_with_nova(question_texts, start_index=0) + model_name = "Nova" + else: + print("Classifying with Gemini API...") + classification_result = classify_questions_with_gemini(question_texts, start_index=0) + model_name = "Gemini" + + if not classification_result or not classification_result.get('data'): + print(f"Batch {i+1} failed: {model_name} API did not return valid data.") + continue + + update_count_in_batch = 0 + for item in classification_result.get('data', []): + item_index = item.get('index') + if item_index is not None and 1 <= item_index <= len(question_ids): + # The item['index'] is 1-based, so we convert to 0-based + matched_id = question_ids[item_index - 1] + new_topic = item.get('chapter_title') + if new_topic: + conn.execute('UPDATE neetprep_questions SET topic = ? WHERE id = ?', (new_topic, matched_id)) + update_count_in_batch += 1 + + conn.commit() + total_classified_count += update_count_in_batch + print(f"Batch {i+1} complete. Classified {update_count_in_batch} questions.") + + # Wait before the next batch + if i < num_batches - 1: + print("Waiting 6 seconds before next batch...") + time.sleep(6) + + except Exception as e: + print(f"\nAn error occurred during batch {i+1}: {repr(e)}") + continue + + conn.close() + print(f"\nClassification finished. In total, {total_classified_count} questions were updated.") + return jsonify({'status': f'Classification complete. Updated {total_classified_count} of {total_to_classify} questions.'}) + + +from rich.table import Table +from rich.console import Console + +@neetprep_bp.route('/neetprep/generate', methods=['POST']) +@login_required +def generate_neetprep_pdf(): + if request.is_json: + data = request.json + else: + data = request.form + + pdf_type = data.get('type') + topics_str = data.get('topics') + topics = json.loads(topics_str) if topics_str and topics_str != '[]' else [] + + conn = get_db_connection() + all_questions = [] + + # Only fetch NeetPrep questions if the feature is enabled for the user + if current_user.neetprep_enabled: + if pdf_type == 'quiz' and topics: + placeholders = ', '.join('?' for _ in topics) + neetprep_questions_from_db = conn.execute(f"SELECT * FROM neetprep_questions WHERE topic IN ({placeholders})", topics).fetchall() + for q in neetprep_questions_from_db: + try: + html_content = f"""{q['question_text']}""" + img_path = os.path.join(current_app.config['TEMP_FOLDER'], f"neetprep_{q['id']}.jpg") + imgkit.from_string(html_content, img_path, options={'width': 800}) + all_questions.append({ + 'image_path': img_path, + 'details': {'id': q['id'], 'options': json.loads(q['options']), 'correct_answer_index': q['correct_answer_index'], 'user_answer_index': None, 'source': 'neetprep', 'topic': q['topic'], 'subject': q['subject']} + }) + except Exception as e: + current_app.logger.error(f"Failed to convert NeetPrep question {q['id']} to image: {e}") + + elif pdf_type == 'all': + neetprep_questions_from_db = conn.execute("SELECT * FROM neetprep_questions").fetchall() + for q in neetprep_questions_from_db: + all_questions.append({"id": q['id'], "question_text": q['question_text'], "options": json.loads(q['options']), "correct_answer_index": q['correct_answer_index'], "user_answer_index": None, "status": "wrong", "source": "neetprep", "custom_fields": {"difficulty": q['level'], "topic": q['topic'], "subject": q['subject']}}) + + elif pdf_type == 'selected' and topics: + placeholders = ', '.join('?' for _ in topics) + neetprep_questions_from_db = conn.execute(f"SELECT * FROM neetprep_questions WHERE topic IN ({placeholders})", topics).fetchall() + for q in neetprep_questions_from_db: + all_questions.append({"id": q['id'], "question_text": q['question_text'], "options": json.loads(q['options']), "correct_answer_index": q['correct_answer_index'], "user_answer_index": None, "status": "wrong", "source": "neetprep", "custom_fields": {"difficulty": q['level'], "topic": q['topic'], "subject": q['subject']}}) + + # Always fetch the user's own classified questions if topics are selected or if it's a quiz + if topics or pdf_type == 'quiz': + # If no topics are selected for a quiz/selection, this should not run or fetch all + if not topics: + # For a quiz, topics are mandatory. For 'selected', topics are mandatory. + if pdf_type in ['quiz', 'selected']: + conn.close() + return jsonify({'error': 'No topics selected.'}), 400 + else: + placeholders = ', '.join('?' for _ in topics) + classified_questions_from_db = conn.execute(f""" + SELECT q.* FROM questions q JOIN sessions s ON q.session_id = s.id + WHERE q.chapter IN ({placeholders}) AND s.user_id = ? + """, (*topics, current_user.id)).fetchall() + for q in classified_questions_from_db: + image_info = conn.execute("SELECT processed_filename FROM images WHERE id = ?", (q['image_id'],)).fetchone() + if image_info and image_info['processed_filename']: + if pdf_type == 'quiz': + all_questions.append({'image_path': os.path.join(current_app.config['PROCESSED_FOLDER'], image_info['processed_filename']),'details': {'id': q['id'], 'options': [], 'correct_answer_index': q['actual_solution'], 'user_answer_index': q['marked_solution'], 'source': 'classified', 'topic': q['chapter'], 'subject': q['subject']}}) + else: + all_questions.append({"id": q['id'], "question_text": f"", "options": [], "correct_answer_index": q['actual_solution'], "user_answer_index": q['marked_solution'], "status": q['status'], "source": "classified", "custom_fields": {"subject": q['subject'], "chapter": q['chapter'], "question_number": q['question_number']}}) + + # For 'all' type, also include user's classified questions + if pdf_type == 'all': + classified_questions_from_db = conn.execute(""" + SELECT q.* FROM questions q JOIN sessions s ON q.session_id = s.id + WHERE s.user_id = ? AND q.subject IS NOT NULL AND q.chapter IS NOT NULL + """, (current_user.id,)).fetchall() + for q in classified_questions_from_db: + image_info = conn.execute("SELECT processed_filename FROM images WHERE id = ?", (q['image_id'],)).fetchone() + if image_info and image_info['processed_filename']: + all_questions.append({"id": q['id'], "question_text": f"", "options": [], "correct_answer_index": q['actual_solution'], "user_answer_index": q['marked_solution'], "status": q['status'], "source": "classified", "custom_fields": {"subject": q['subject'], "chapter": q['chapter'], "question_number": q['question_number']}}) + + conn.close() + + if not all_questions: + return jsonify({'error': 'No questions found for the selected criteria.'}), 404 + + if pdf_type == 'quiz': + return render_template('quiz_v2.html', questions=all_questions) + + test_name = "All Incorrect Questions" + if pdf_type == 'selected': + test_name = f"Incorrect Questions - {', '.join(topics)}" + + final_json_output = { + "version": "2.1", "test_name": test_name, + "config": { "font_size": 22, "auto_generate_pdf": False, "layout": data.get('layout', {}) }, + "metadata": { "source_book": "NeetPrep & Classified", "student_id": USER_ID, "tags": ", ".join(topics) }, + "questions": all_questions, "view": True + } + + try: + result, status_code = _process_json_and_generate_pdf(final_json_output, current_user.id) + if status_code != 200: + return jsonify(result), status_code + + if result.get('success'): + return jsonify({'success': True, 'pdf_url': result.get('view_url')}) + else: + return jsonify({'error': result.get('error', 'Failed to generate PDF via internal call.')}), 500 + except Exception as e: + current_app.logger.error(f"Failed to call _process_json_and_generate_pdf: {repr(e)}") + return jsonify({'error': str(e)}), 500 + +@neetprep_bp.route('/neetprep/edit') +@login_required +def edit_neetprep_questions(): + """Renders the page for editing NeetPrep questions.""" + conn = get_db_connection() + topics = conn.execute('SELECT DISTINCT topic FROM neetprep_questions ORDER BY topic').fetchall() + questions = conn.execute('SELECT id, question_text, topic, subject FROM neetprep_questions ORDER BY id').fetchall() + + questions_plain = [] + for q in questions: + q_dict = dict(q) + soup = BeautifulSoup(q_dict['question_text'], 'html.parser') + plain_text = soup.get_text(strip=True) + q_dict['question_text_plain'] = (plain_text[:100] + '...') if len(plain_text) > 100 else plain_text + questions_plain.append(q_dict) + + conn.close() + return render_template('neetprep_edit.html', questions=questions_plain, topics=[t['topic'] for t in topics]) + +@neetprep_bp.route('/neetprep/update_question/', methods=['POST']) +@login_required +def update_neetprep_question(question_id): + """Handles updating a question's metadata.""" + # This route modifies global neetprep data. In a real multi-user app, + # this should be restricted to admin users. For now, @login_required is a basic protection. + data = request.json + new_topic = data.get('topic') + new_subject = data.get('subject') + + if not new_topic or not new_subject: + return jsonify({'error': 'Topic and Subject cannot be empty.'}), 400 + + try: + conn = get_db_connection() + conn.execute( + 'UPDATE neetprep_questions SET topic = ?, subject = ? WHERE id = ?', + (new_topic, new_subject, question_id) + ) + conn.commit() + conn.close() + return jsonify({'success': True}) + except Exception as e: + current_app.logger.error(f"Error updating question {question_id}: {repr(e)}") + return jsonify({'error': str(e)}), 500 + +def run_hardcoded_query(query_template, **kwargs): + """Helper function to run a GraphQL query.""" + final_query = query_template.format(**kwargs) + payload = {'query': final_query, 'variables': {}} + try: + response = requests.post(ENDPOINT_URL, headers=HEADERS, json=payload, timeout=30) + response.raise_for_status() + return response.json() + except requests.exceptions.RequestException as e: + current_app.logger.error(f"NeetPrep API Request Error: {repr(e)}") + return None diff --git a/nova_classifier.py b/nova_classifier.py new file mode 100644 index 0000000000000000000000000000000000000000..c1c5c2a634a0e7a3259b793ad7cbf0f6cee0f80c --- /dev/null +++ b/nova_classifier.py @@ -0,0 +1,296 @@ +import os +import json +import requests +import sys +from typing import List, Optional, Dict, Any +from api_key_manager import get_api_key_manager + +def classify_questions_with_nova(questions: List[str], start_index: int = 0) -> Optional[Dict[Any, Any]]: + """ + Classifies a single batch of questions using Amazon Nova via OpenRouter API. + `questions` should be a list of strings representing one batch. + `start_index` is the overall starting index for this batch (e.g., 0, 7, 14...). + """ + # Get API key from the manager + manager = get_api_key_manager() + api_key, key_index = manager.get_key('openrouter') + + if not api_key: + raise ValueError("No available OpenRouter API keys. Please set OPENROUTER_API_KEY environment variable.") + + # Construct the input text with the current batch of questions + # The model expects 1-based indexing in the prompt. + input_text = "\n".join([f"{j + start_index + 1}. {q}" for j, q in enumerate(questions)]) + + prompt = f""" +**System Role:** You are a question classifier for NEET/JEE exams, specialized in mapping questions to their corresponding subjects and chapters from the NCERT syllabus. + +Your task is to analyze each question, first classify it into the most relevant subject, and then identify the most relevant chapter(s) from the official syllabus structures provided below. + +**Available Subjects (Use these exact titles):** +- Biology +- Chemistry +- Physics +- Mathematics + +**Syllabus Chapters (Use these exact titles for the respective subjects):** + +--- +**1. BIOLOGY (Common for NEET & JEE)** + +**Class XI** +1. The Living World +2. Biological Classification +3. Plant Kingdom +4. Animal Kingdom +5. Morphology of Flowering Plants +6. Anatomy of Flowering Plants +7. Structural Organisation in Animals +8. Cell: The Unit of Life +9. Biomolecules +10. Cell Cycle and Cell Division +11. Photosynthesis in Higher Plants +12. Respiration in Plants +13. Plant Growth and Development +14. Breathing and Exchange of Gases +15. Body Fluids and Circulation +16. Excretory Products and their Elimination +17. Locomotion and Movement +18. Neural Control and Coordination +19. Chemical Coordination and Integration +20. Sexual Reproduction in Flowering Plants +21. Human Reproduction +22. Reproductive Health +23. Principles of Inheritance and Variation +24. Molecular Basis of Inheritance +25. Evolution +26. Health and Disease +27. Improvement in Food Production +28. Microbes in Human Welfare +29. Biotechnology - Principles and Processes +30. Biotechnology and Its Applications +31. Organisms and Populations +32. Ecosystem +33. Biodiversity and Its Conservation + +--- +**2. CHEMISTRY (Common for NEET & JEE)** + +**Class XI** +1. Some Basic Concepts of Chemistry +2. Structure of Atom +3. Classification of Elements and Periodicity in Properties +4. Chemical Bonding and Molecular Structure +5. States of Matter: Gases and Liquids +6. Thermodynamics +7. Equilibrium +8. Redox Reactions +9. Hydrogen +10. The s-Block Elements +11. The p-Block Elements (Group 13 and 14) +12. Organic Chemistry – Some Basic Principles and Techniques (GOC) +13. Hydrocarbons +14. Environmental Chemistry + +**Class XII** +1. The Solid State +2. Solutions +3. Electrochemistry +4. Chemical Kinetics +5. Surface Chemistry +6. General Principles and Processes of Isolation of Elements (Metallurgy) +7. The p-Block Elements (Group 15 to 18) +8. The d- and f- Block Elements +9. Coordination Compounds +10. Haloalkanes and Haloarenes +11. Alcohols, Phenols and Ethers +12. Aldehydes, Ketones and Carboxylic Acids +13. Amines +14. Biomolecules +15. Polymers +16. Chemistry in Everyday Life + +--- +**3. PHYSICS (Common for NEET & JEE)** + +**Class XI** +1. Units and Measurements +2. Motion in a Straight Line +3. Motion in a Plane +4. Laws of Motion +5. Work, Energy and Power +6. System of Particles and Rotational Motion +7. Gravitation +8. Mechanical Properties of Solids +9. Mechanical Properties of Fluids +10. Thermal Properties of Matter +11. Thermodynamics +12. Kinetic Theory +13. Oscillations +14. Waves + +**Class XII** +1. Electric Charges and Fields +2. Electrostatic Potential and Capacitance +3. Current Electricity +4. Moving Charges and Magnetism +5. Magnetism and Matter +6. Electromagnetic Induction +7. Alternating Current +8. Electromagnetic Waves +9. Ray Optics and Optical Instruments +10. Wave Optics +11. Dual Nature of Radiation and Matter +12. Atoms +13. Nuclei +14. Semiconductor Electronics: Materials, Devices and Simple Circuits +15. Communication Systems + +--- +**4. MATHEMATICS (For JEE Only)** + +**Class XI** +1. Sets +2. Relations and Functions +3. Trigonometric Functions +4. Principle of Mathematical Induction +5. Complex Numbers and Quadratic Equations +6. Linear Inequalities +7. Permutations and Combinations +8. Binomial Theorem +9. Sequences and Series +10. Straight Lines +11. Conic Sections +12. Introduction to Three Dimensional Geometry +13. Limits and Derivatives +14. Mathematical Reasoning +15. Statistics +16. Probability + +**Class XII** +1. Relations and Functions +2. Inverse Trigonometric Functions +3. Matrices +4. Determinants +5. Continuity and Differentiability +6. Application of Derivatives +7. Integrals +8. Application of Integrals +9. Differential Equations +10. Vector Algebra +11. Three Dimensional Geometry +12. Linear Programming +13. Probability + +--- + +**Classification Guidelines:** + +1. **Primary Classification**: Identify the single most relevant subject, and then the most relevant chapter(s) within that subject, that directly addresses the question's core concept. +2. **Multi-Chapter Questions**: If a question explicitly spans 2-3 distinct chapters, include all relevant chapters. +3. **Confidence Scoring** (0.0 to 1.0): + * **1.0**: Perfect match + * **0.8-0.9**: Strong match + * **0.5-0.7**: Moderate match + * **Below 0.5**: Avoid unless unavoidable. +4. **Non-Syllabus Questions**: If a question is not from any of the provided subjects/chapters, set `subject` to 'Unclassified' and `chapter_title` to 'Unclassified'. + +**Critical Requirements:** + +- Use ONLY the subject titles exactly as listed above, or 'Unclassified'. +- Use ONLY the chapter titles exactly as listed above, or 'Unclassified'. +- Preserve the original question text completely. +- Output ONLY valid JSON. +- The "index" field MUST match the question number shown in the input (e.g., if the question is numbered "8.", then "index": 8). + +**Output JSON Schema:** + +```json +{{ + "data": [ + {{ + "index": 1, + "subject": "", + "chapter_index": , + "chapter_title": "", + "original_question_text": "", + "confidence": <0.0 to 1.0> + }} + ], + "success": [true] +}} +``` + +Now classify the following question(s): +``` +{input_text} +``` + +Output ONLY the JSON response, nothing else. +""" + + url = "https://openrouter.ai/api/v1/chat/completions" + headers = { + "Authorization": f"Bearer {api_key}", + "Content-Type": "application/json", + } + + request_body = { + "model": "amazon/nova-2-lite-v1:free", + "messages": [ + {"role": "user", "content": prompt} + ], + } + + print(f"Sending batch to Nova API with {len(questions)} questions.") + print(f"Sending request to Nova API. Body: {json.dumps(request_body, indent=2)}") # Full logging enabled + + try: + response = requests.post(url, headers=headers, json=request_body, timeout=300) + response.raise_for_status() + + print(f"Received raw response from Nova: {response.text}") # Full logging enabled + + # Parse the response JSON + response_json = response.json() + + # Extract the content from Nova's response + choices = response_json.get('choices', []) + if not choices: + print("Error: Nova API returned no choices.") + return None + + content = choices[0].get('message', {}).get('content', '') + if not content: + print("Error: Nova API returned empty content.") + return None + + # Nova often wraps JSON in markdown code blocks, so we need to extract it + content = content.strip() + + # Remove markdown code block markers if present + if content.startswith('```json'): + content = content[7:] # Remove ```json + elif content.startswith('```'): + content = content[3:] # Remove ``` + + if content.endswith('```'): + content = content[:-3] # Remove closing ``` + + content = content.strip() + + # Parse the JSON from the content + batch_result = json.loads(content) + manager.mark_success('openrouter', key_index) + return batch_result + + except requests.exceptions.RequestException as e: + print(f"Error during Nova API call: {repr(e)}", file=sys.stderr) + print(f"Response body: {e.response.text if e.response else 'N/A'}", file=sys.stderr) + manager.mark_failure('openrouter', key_index) + return None + except (json.JSONDecodeError, KeyError, IndexError) as e: + print(f"Error parsing Nova response: {repr(e)}", file=sys.stderr) + print(f"Raw response text: {response.text if 'response' in locals() else 'N/A'}", file=sys.stderr) + manager.mark_failure('openrouter', key_index) + return None diff --git a/pre-migration-report.md b/pre-migration-report.md new file mode 100644 index 0000000000000000000000000000000000000000..6f0fef0667c43be5a3e0f21d9dff3de0932da0c3 --- /dev/null +++ b/pre-migration-report.md @@ -0,0 +1,368 @@ +# Pre-Migration Report: Single-User to Multi-User Architecture + +This document outlines the necessary changes to migrate the DocuPDF application from a single-user to a multi-user architecture. The migration is designed to be completed in phases, ensuring that existing data is preserved and correctly associated with the primary user. + +--- + +## Phase 1: User Authentication Foundation + +This phase introduces the core concepts of users and authentication. + +### 1.1 New `users` Table + +A new table will be created to store user credentials. + +```sql +-- file: database.py (addition) +CREATE TABLE IF NOT EXISTS users ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + username TEXT NOT NULL UNIQUE, + email TEXT NOT NULL UNIQUE, + password_hash TEXT NOT NULL, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP +); +``` + +### 1.2 New File: `user_manager.py` + +A new file will handle user session management, password hashing, and provide the user model required by Flask-Login. + +```python +# file: user_manager.py (new file) +from flask_login import LoginManager, UserMixin +from werkzeug.security import generate_password_hash, check_password_hash +from utils import get_db_connection + +class User(UserMixin): + def __init__(self, id, username, email, password_hash): + self.id = id + self.username = username + self.email = email + self.password_hash = password_hash + + @staticmethod + def get(user_id): + conn = get_db_connection() + user_row = conn.execute('SELECT * FROM users WHERE id = ?', (user_id,)).fetchone() + conn.close() + if user_row: + return User(user_row['id'], user_row['username'], user_row['email'], user_row['password_hash']) + return None + + @staticmethod + def get_by_username(username): + conn = get_db_connection() + user_row = conn.execute('SELECT * FROM users WHERE username = ?', (username,)).fetchone() + conn.close() + if user_row: + return User(user_row['id'], user_row['username'], user_row['email'], user_row['password_hash']) + return None + +def setup_login_manager(app): + login_manager = LoginManager() + login_manager.init_app(app) + login_manager.login_view = 'user_auth.login' # New blueprint for user auth + + @login_manager.user_loader + def load_user(user_id): + return User.get(user_id) + +# (Additional functions for creating users, etc. will be added here) +``` + +### 1.3 Application Setup (`app.py`) + +The main `app.py` will be updated to initialize the `LoginManager` and register the new authentication blueprint. + +```python +# file: app.py (changes) +# Current +def create_app(): + app = Flask(__name__) + # ... + # Register Blueprints + from routes import main_bp + # ... + app.register_blueprint(main_bp) + return app + +# After +from flask_login import LoginManager + +def create_app(): + app = Flask(__name__) + app.config['SECRET_KEY'] = os.urandom(24) # Important for session security + # ... + + # Setup Login Manager + from user_manager import setup_login_manager + setup_login_manager(app) + + # Register Blueprints + from routes import main_bp + from user_auth_routes import auth_bp # New blueprint for login/register + # ... + app.register_blueprint(main_bp) + app.register_blueprint(auth_bp) + return app +``` + +### 1.4 New Templates: `login.html` and `register.html` + +New HTML templates will be created for the user login and registration forms. These will be standard forms with fields for username, password, and email. + +--- + +## Phase 2: Database and Data Segregation + +This phase links all application data to specific users. + +### 2.1 Database Schema Changes + +The following tables will be altered to include a `user_id` foreign key. + +```sql +-- file: database.py (migrations) + +-- Add user_id to sessions +ALTER TABLE sessions ADD COLUMN user_id INTEGER REFERENCES users(id); + +-- Add user_id to generated_pdfs +ALTER TABLE generated_pdfs ADD COLUMN user_id INTEGER REFERENCES users(id); + +-- Add user_id to folders +ALTER TABLE folders ADD COLUMN user_id INTEGER REFERENCES users(id); +``` + +### 2.2 Backend Logic Changes (Code-by-Code) + +All routes and functions that interact with user-specific data must be updated. This will be done by using the `current_user` object provided by Flask-Login after a user logs in. All routes will also be protected with the `@login_required` decorator. + +#### **`routes.py`** + +##### `v2_upload()` + +**Current:** +```python +@main_bp.route('/v2/upload', methods=['POST']) +def v2_upload(): + session_id = str(uuid.uuid4()) + # ... + conn = get_db_connection() + conn.execute('INSERT INTO sessions (id, original_filename, name) VALUES (?, ?, ?)', (session_id, original_filename, original_filename)) + # ... +``` + +**After:** +```python +from flask_login import login_required, current_user + +@main_bp.route('/v2/upload', methods=['POST']) +@login_required +def v2_upload(): + session_id = str(uuid.uuid4()) + # ... + conn = get_db_connection() + conn.execute('INSERT INTO sessions (id, original_filename, name, user_id) VALUES (?, ?, ?, ?)', + (session_id, original_filename, original_filename, current_user.id)) + # ... +``` + +##### `question_entry_v2(session_id)` + +**Current:** +```python +@main_bp.route('/question_entry_v2/') +def question_entry_v2(session_id): + conn = get_db_connection() + session_data = conn.execute( + 'SELECT original_filename, subject, tags, notes FROM sessions WHERE id = ?', (session_id,) + ).fetchone() + #... +``` + +**After:** +```python +from flask_login import login_required, current_user + +@main_bp.route('/question_entry_v2/') +@login_required +def question_entry_v2(session_id): + conn = get_db_connection() + # Add user_id check to prevent unauthorized access + session_data = conn.execute( + 'SELECT original_filename, subject, tags, notes FROM sessions WHERE id = ? AND user_id = ?', + (session_id, current_user.id) + ).fetchone() + if not session_data: + return "Unauthorized", 403 + #... +``` +*(Note: This pattern of adding `@login_required` and `AND user_id = ?` to queries will be repeated for almost every route in `routes.py`, `dashboard.py`, `json_processor.py`, etc. The examples above illustrate the core change.)* + +#### **`dashboard.py`** + +##### `dashboard()` + +**Current:** +```python +@dashboard_bp.route('/dashboard') +def dashboard(): + conn = get_db_connection() + sessions_rows = conn.execute(""" + SELECT s.id, ... + FROM sessions s + ... + """).fetchall() + #... +``` + +**After:** +```python +from flask_login import login_required, current_user + +@dashboard_bp.route('/dashboard') +@login_required +def dashboard(): + conn = get_db_connection() + sessions_rows = conn.execute(""" + SELECT s.id, ... + FROM sessions s + LEFT JOIN images i ON s.id = i.session_id + WHERE s.user_id = ? + GROUP BY s.id, ... + ORDER BY s.created_at DESC + """, (current_user.id,)).fetchall() + #... +``` + +--- + +## Phase 3: Security and UI + +This phase focuses on the user-facing elements and securing file access. + +### 3.1 UI Navigation (`_nav_links.html`) + +The navigation links will be updated to show context-aware links for login, registration, and logout. + +**Current:** +```html + + +``` + +**After:** +```html + + +``` + +### 3.2 Secure File Access (`routes.py`) + +Routes that serve files directly must check for ownership before sending the file. + +##### `download_file(filename)` + +**Current:** +```python +# file: routes.py +@main_bp.route('/download/') +def download_file(filename): + return send_file(os.path.join(current_app.config['OUTPUT_FOLDER'], filename), as_attachment=True) +``` + +**After:** +```python +# file: routes.py +from flask_login import login_required, current_user + +@main_bp.route('/download/') +@login_required +def download_file(filename): + conn = get_db_connection() + # Check if the requested file belongs to the current user + pdf_owner = conn.execute( + 'SELECT user_id FROM generated_pdfs WHERE filename = ?', (filename,) + ).fetchone() + conn.close() + + if pdf_owner and pdf_owner['user_id'] == current_user.id: + return send_file(os.path.join(current_app.config['OUTPUT_FOLDER'], filename), as_attachment=True) + else: + return "Unauthorized", 403 +``` + +--- + +## Data Migration Script (Conceptual) + +A one-time script will be created to migrate the existing data. + +```python +# file: migrate_to_multiuser.py (conceptual) +import sqlite3 +from werkzeug.security import generate_password_hash + +def migrate(): + conn = sqlite3.connect('database.db') + cursor = conn.cursor() + + # 1. Create a default user (credentials should be provided securely) + default_username = 'admin' # Or your preferred username + default_password = 'your_secure_password' + password_hash = generate_password_hash(default_password) + + try: + cursor.execute( + "INSERT INTO users (username, email, password_hash) VALUES (?, ?, ?)", + (default_username, 'admin@local.host', password_hash) + ) + user_id = cursor.lastrowid + print(f"Created default user '{default_username}' with ID {user_id}") + except sqlite3.IntegrityError: + print("Default user already exists.") + user_id = cursor.execute("SELECT id FROM users WHERE username = ?", (default_username,)).fetchone()[0] + + # 2. Add user_id columns (This should be done via ALTER TABLE statements first) + # ... + + # 3. Assign all existing data to the default user + tables_to_update = ['sessions', 'generated_pdfs', 'folders'] + for table in tables_to_update: + try: + cursor.execute(f"UPDATE {table} SET user_id = ? WHERE user_id IS NULL", (user_id,)) + print(f"Assigned {cursor.rowcount} records in '{table}' to user {user_id}") + except sqlite3.OperationalError as e: + print(f"Could not update table {table}. Maybe user_id column doesn't exist? Error: {e}") + + conn.commit() + conn.close() + print("Data migration complete.") + +if __name__ == '__main__': + migrate() +``` diff --git a/processing.py b/processing.py new file mode 100644 index 0000000000000000000000000000000000000000..546189e1d19806dac60e1ad4fd21ee35f94e73a2 --- /dev/null +++ b/processing.py @@ -0,0 +1,305 @@ + +import os +import base64 +import io +import re +import json +import requests +import cv2 +import numpy as np +from PIL import Image +from flask import current_app +from api_key_manager import get_api_key_manager + +# --- NVIDIA NIM Configuration --- +NIM_API_URL = "https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-ocr-v1" + +def resize_image_if_needed(image_path: str) -> bytes: + """Resizes an image to a maximum of 500x500 pixels and returns bytes.""" + with Image.open(image_path) as image: + MAX_SIZE = 500 + width, height = image.size + + if width > height: + new_width = min(width, MAX_SIZE) + new_height = int(height * (new_width / width)) + else: + new_height = min(height, MAX_SIZE) + new_width = int(width * (new_height / height)) + + if new_width > MAX_SIZE: + new_width = MAX_SIZE + new_height = int(height * (new_width / width)) + if new_height > MAX_SIZE: + new_height = MAX_SIZE + new_width = int(width * (new_height / height)) + + resized_image = image.resize((new_width, new_height), Image.Resampling.LANCZOS) + + if resized_image.mode == 'RGBA': + resized_image = resized_image.convert('RGB') + + img_byte_arr = io.BytesIO() + resized_image.save(img_byte_arr, format='JPEG', quality=85, optimize=True) + image_bytes = img_byte_arr.getvalue() + + base64_size = len(base64.b64encode(image_bytes).decode('utf-8')) + if base64_size > 180000: + quality = max(50, int(85 * (180000 / base64_size))) + img_byte_arr = io.BytesIO() + resized_image.save(img_byte_arr, format='JPEG', quality=quality, optimize=True) + image_bytes = img_byte_arr.getvalue() + + return image_bytes + +def call_nim_ocr_api(image_bytes: bytes): + """Calls the NVIDIA NIM API to perform OCR on an image.""" + # Get API key from the manager + manager = get_api_key_manager() + api_key, key_index = manager.get_key('nvidia') + + if not api_key: + raise Exception("No available NVIDIA API keys. Please set NVIDIA_API_KEY environment variable.") + + NIM_HEADERS = { + "Authorization": f"Bearer {api_key}", + "Accept": "application/json", + "Content-Type": "application/json", + } + + base64_encoded_data = base64.b64encode(image_bytes) + base64_string = base64_encoded_data.decode('utf-8') + + if len(base64_string) > 180000: + raise Exception("Image too large. To upload larger images, use the assets API.") + + image_url = f"data:image/png;base64,{base64_string}" + + payload = { + "input": [ + { + "type": "image_url", + "url": image_url + } + ] + } + + try: + response = requests.post(NIM_API_URL, headers=NIM_HEADERS, json=payload, timeout=300) + response.raise_for_status() + result = response.json() + manager.mark_success('nvidia', key_index) + return result + except requests.exceptions.RequestException as e: + manager.mark_failure('nvidia', key_index) + error_detail = str(e) + if e.response is not None: + try: + error_detail = e.response.json().get("error", e.response.text) + except json.JSONDecodeError: + error_detail = e.response.text + raise Exception(f"NIM API Error: {error_detail}") + +def extract_question_number_from_ocr_result(ocr_result: dict) -> str: + """Extracts the question number from the OCR result.""" + try: + if "data" in ocr_result and len(ocr_result["data"]) > 0: + text_detections = ocr_result["data"][0].get("text_detections", []) + content = " ".join([detection["text_prediction"]["text"] for detection in text_detections]) + else: + content = str(ocr_result) + + match = re.search(r'^\s*(\d+)', content) + if match: + return match.group(1) + + match = re.search(r'(?:^|\s)(?:[Qq][\.:]?\s*|QUESTION\s+)(\d+)', content, re.IGNORECASE) + if match: + return match.group(1) + + match = re.search(r'^\s*(\d+)[\.\)]', content) + if match: + return match.group(1) + + return "" + except (KeyError, IndexError, TypeError): + return "" + +def crop_image_perspective(image_path, points): + if len(points) < 4: return cv2.imread(image_path) + img = cv2.imread(image_path) + if img is None: raise ValueError("Could not read the image file.") + height, width = img.shape[:2] + def clamp(val): return max(0.0, min(1.0, val)) + src_points = np.array([[clamp(p.get('x', 0.0)) * width, clamp(p.get('y', 0.0)) * height] for p in points[:4]], dtype=np.float32) + (tl, tr, br, bl) = src_points + width_top, width_bottom = np.linalg.norm(tr - tl), np.linalg.norm(br - bl) + max_width = int(max(width_top, width_bottom)) + height_right, height_left = np.linalg.norm(tr - br), np.linalg.norm(tl - bl) + max_height = int(max(height_right, height_left)) + if max_width == 0 or max_height == 0: return img + dst_points = np.array([[0, 0], [max_width - 1, 0], [max_width - 1, max_height - 1], [0, max_height - 1]], dtype=np.float32) + matrix = cv2.getPerspectiveTransform(src_points, dst_points) + return cv2.warpPerspective(img, matrix, (max_width, max_height)) + +def create_pdf_from_full_images(image_paths, output_filename, resolution=300.0): + """ + Creates a PDF from a list of full-page images, preserving image quality + by creating pages of the same size as the images. + """ + if not image_paths: + return False + + try: + pdf_pages = [] + for image_path in image_paths: + try: + with Image.open(image_path) as img: + # Ensure image is in a format that can be saved to PDF + img = img.convert('RGB') + + # Create a new image with a white background of the same size. + # This avoids issues with alpha channels and ensures consistency. + page = Image.new('RGB', img.size, 'white') + page.paste(img, (0, 0)) + pdf_pages.append(page) + except Exception as e: + print(f"Error opening or processing image {image_path}: {e}") + + if not pdf_pages: + return False + + # Save the first page and append the rest + pdf_pages[0].save( + output_filename, + "PDF", + save_all=True, + append_images=pdf_pages[1:], + resolution=resolution + ) + return True + except Exception as e: + print(f"Error saving final PDF: {e}") + return False + +def remove_color_from_image(image_path, target_colors, threshold, bg_mode, region_box=None): + """ + Removes specific colors from an image using CIELAB Delta E distance. + Uses manual RGB->Lab conversion to strictly match frontend JS logic (Standard CIELAB). + """ + # Read image (OpenCV loads as BGR) + img = cv2.imread(image_path, cv2.IMREAD_UNCHANGED) + if img is None: + raise ValueError(f"Could not read image: {image_path}") + + # Handle Alpha Channel + if img.shape[2] == 3: + img = cv2.cvtColor(img, cv2.COLOR_BGR2BGRA) + + # 1. PREPARE IMAGE (BGR -> RGB -> Normalized Float) + # We work on a copy for calculation + img_bgr = img[:, :, :3] + img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB) + + # Normalize to 0-1 for formula consistency with typical JS/CSS definitions + # (Frontend JS might be using 0-255 raw, let's verify frontend code provided earlier) + # Frontend code: r = rgb[0] / 255 ... + # Yes, frontend normalizes. + rgb_norm = img_rgb.astype(np.float32) / 255.0 + + # 2. RGB to XYZ (Vectorized) + # Formula matches JS: r = (r > 0.04045) ? ... + mask_linear = rgb_norm > 0.04045 + rgb_linear = np.where(mask_linear, np.power((rgb_norm + 0.055) / 1.055, 2.4), rgb_norm / 12.92) + + R, G, B = rgb_linear[:,:,0], rgb_linear[:,:,1], rgb_linear[:,:,2] + + X = R * 0.4124 + G * 0.3576 + B * 0.1805 + Y = R * 0.2126 + G * 0.7152 + B * 0.0722 + Z = R * 0.0193 + G * 0.1192 + B * 0.9505 + + # Scale XYZ + X /= 0.95047 + Y /= 1.00000 + Z /= 1.08883 + + # 3. XYZ to Lab + # Formula: x = (x > 0.008856) ? ... + xyz_stack = np.stack([X, Y, Z], axis=-1) + mask_xyz = xyz_stack > 0.008856 + f_xyz = np.where(mask_xyz, np.power(xyz_stack, 1/3), (7.787 * xyz_stack) + 16/116) + + fx, fy, fz = f_xyz[:,:,0], f_xyz[:,:,1], f_xyz[:,:,2] + + L_chn = (116.0 * fy) - 16.0 + a_chn = 500.0 * (fx - fy) + b_chn = 200.0 * (fy - fz) + + # 4. CALCULATE DISTANCE + # Threshold mapping matches frontend + max_delta_e = 110.0 - (float(threshold) * 100.0) + max_dist_sq = max_delta_e ** 2 + + final_keep_mask = np.zeros(L_chn.shape, dtype=bool) + + if target_colors: + # Convert Targets (RGB -> Lab) using same math + # Since targets are few, we can do simple loop or small array + for c in target_colors: + # Normalize + r, g, b = c['r']/255.0, c['g']/255.0, c['b']/255.0 + + # Linearize + r = ((r + 0.055) / 1.055) ** 2.4 if r > 0.04045 else r / 12.92 + g = ((g + 0.055) / 1.055) ** 2.4 if g > 0.04045 else g / 12.92 + b = ((b + 0.055) / 1.055) ** 2.4 if b > 0.04045 else b / 12.92 + + # XYZ + x = (r * 0.4124 + g * 0.3576 + b * 0.1805) / 0.95047 + y = (r * 0.2126 + g * 0.7152 + b * 0.0722) / 1.00000 + z = (r * 0.0193 + g * 0.1192 + b * 0.9505) / 1.08883 + + # Lab + fx = x ** (1/3) if x > 0.008856 else (7.787 * x) + 16/116 + fy = y ** (1/3) if y > 0.008856 else (7.787 * y) + 16/116 + fz = z ** (1/3) if z > 0.008856 else (7.787 * z) + 16/116 + + tL = (116.0 * fy) - 16.0 + ta = 500.0 * (fx - fy) + tb = 200.0 * (fy - fz) + + # Dist + dist_sq = (L_chn - tL)**2 + (a_chn - ta)**2 + (b_chn - tb)**2 + final_keep_mask |= (dist_sq <= max_dist_sq) + + # Handle Region Box + if region_box: + h, w = img.shape[:2] + rx = int(region_box['x'] * w) + ry = int(region_box['y'] * h) + rw = int(region_box['w'] * w) + rh = int(region_box['h'] * h) + + # Mask is TRUE everywhere EXCEPT the region (Keep outside) + region_protection_mask = np.ones(L_chn.shape, dtype=bool) + # Ensure coords are within bounds + ry = max(0, ry); rx = max(0, rx) + if rw > 0 and rh > 0: + region_protection_mask[ry:ry+rh, rx:rx+rw] = False + + final_keep_mask |= region_protection_mask + + # Apply Mask to Image + result = img.copy() + + if bg_mode == 'black': + bg_color = [0, 0, 0, 255] + elif bg_mode == 'white': + bg_color = [255, 255, 255, 255] + else: # transparent + bg_color = [0, 0, 0, 0] + + remove_mask = ~final_keep_mask + result[remove_mask] = bg_color + + return result diff --git a/qtab_routes.py b/qtab_routes.py new file mode 100644 index 0000000000000000000000000000000000000000..e75f1ab7e43b07192396cdb22bdc290d3fef4174 --- /dev/null +++ b/qtab_routes.py @@ -0,0 +1,509 @@ +from flask import Blueprint, render_template, request, jsonify, redirect, url_for, flash, current_app, send_from_directory +from flask_login import login_required, current_user +from database import get_db_connection, get_qtab_folder_tree +from werkzeug.utils import secure_filename +import json +import os +import base64 +from datetime import datetime, timezone +from google import genai +from google.genai import types +import logging + +qtab_bp = Blueprint('qtab', __name__) + +logger = logging.getLogger(__name__) + +def extract_json_from_response(response_text): + """Extract JSON from Gemini response, handling code blocks.""" + try: + if "```json" in response_text: + json_text = response_text.split("```json")[1].split("```")[0] + else: + json_text = response_text + return json.loads(json_text) + except Exception as e: + logger.error(f"JSON extraction error: {str(e)}") + return { + "status": "error", + "message": "Failed to parse response JSON", + "error": str(e) + } + +def process_image_for_questions(image_path, username): + """Process an image to extract question-answer pairs using Gemini.""" + try: + current_time = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S") + + # Read image file + with open(image_path, 'rb') as f: + image_bytes = f.read() + + image_base64 = base64.b64encode(image_bytes).decode("utf-8") + + # Initialize GenAI client + api_key = os.getenv("GEMINI_API_KEY") + if not api_key: + return { + "status": "error", + "message": "GEMINI_API_KEY not configured" + } + + client = genai.Client(api_key=api_key) + model = "gemini-flash-latest" + + prompt = f"""Current Date and Time (UTC): {current_time} +Current User's Login: {username} + +You are given an image file. Your task is to: + + Extract question numbers and their corresponding answers, forming pairs. + + Group them under appropriate sections, such as "Question Paper 1", "Section A", "Part B", etc., if such headers are present in the image. + + If no sections are present, just list the question–answer pairs normally. + + If the image is faulty, unclear, or does not contain extractable question-answer data, return a clear error in JSON. + + Do not output anything except a valid JSON object. + +Output Format: + +If sections are detected (try your level best to do so): + +{{ + "status": "success", + "data": {{ + "Section 1": [ + {{"question_number": "1", "answer": "B"}}, + {{"question_number": "2", "answer": "C"}} + ], + "Section 2": [ + {{"question_number": "1", "answer": "A"}}, + {{"question_number": "2", "answer": "D"}} + ] + }} +}} + +If no sections are detected: + +{{ + "status": "success", + "data": [ + {{"question_number": "1", "answer": "B"}}, + {{"question_number": "2", "answer": "C"}} + ] +}} + +If the image is faulty or data cannot be extracted: + +{{ + "status": "error", + "message": "Image is unreadable or does not contain question-answer data." +}} + +Ensure the output is strictly in JSON format with no additional explanations or text.""" + + result = client.models.generate_content( + model=model, + contents=[ + types.Content( + role="user", + parts=[ + types.Part.from_text(text=prompt), + types.Part.from_bytes( + data=base64.b64decode(image_base64), + mime_type="image/jpeg" + ), + ], + ) + ], + ) + + parsed_result = extract_json_from_response(result.text) + parsed_result.update({ + "metadata": { + "processed_at": current_time, + "processed_by": username + } + }) + return parsed_result + + except Exception as e: + logger.error(f"Image processing error: {str(e)}") + return { + "status": "error", + "message": "Failed to process image.", + "error": str(e), + "metadata": { + "processed_at": datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S"), + "processed_by": username + } + } + + +@qtab_bp.route('/qtab') +@qtab_bp.route('/qtab/') +@login_required +def qtab_list(folder_path=''): + """Display the question table interface with folder navigation.""" + conn = get_db_connection() + + # Folder Navigation Logic (same as subjective_list) + folder_id = None + breadcrumbs = [] + + if folder_path: + parts = folder_path.split('/') + parent_id = None + for i, part in enumerate(parts): + res = conn.execute( + "SELECT id FROM qtab_folders WHERE name = ? AND user_id = ? AND (parent_id = ? OR (? IS NULL AND parent_id IS NULL))", + (part, current_user.id, parent_id, parent_id) + ).fetchone() + if not res: + conn.close() + flash('Folder not found.', 'danger') + return redirect(url_for('qtab.qtab_list')) + parent_id = res['id'] + breadcrumbs.append({'name': part, 'path': '/'.join(parts[:i+1])}) + folder_id = parent_id + + # Fetch Subfolders + if folder_id: + subfolders = conn.execute( + 'SELECT * FROM qtab_folders WHERE parent_id = ? AND user_id = ? ORDER BY name', + (folder_id, current_user.id) + ).fetchall() + else: + subfolders = conn.execute( + 'SELECT * FROM qtab_folders WHERE parent_id IS NULL AND user_id = ? ORDER BY name', + (current_user.id,) + ).fetchall() + + # Fetch images in this folder from the qtab_images table + if folder_id: + images = conn.execute( + 'SELECT * FROM qtab_images WHERE folder_id = ? AND user_id = ? ORDER BY created_at DESC', + (folder_id, current_user.id) + ).fetchall() + else: + images = conn.execute( + 'SELECT * FROM qtab_images WHERE folder_id IS NULL AND user_id = ? ORDER BY created_at DESC', + (current_user.id,) + ).fetchall() + + conn.close() + + # Convert to dicts for template compatibility + subfolders = [dict(row) for row in subfolders] + images = [dict(row) for row in images] + + folder_tree = get_qtab_folder_tree(current_user.id) + + return render_template( + 'qtab_list.html', + images=images, + subfolders=subfolders, + breadcrumbs=breadcrumbs, + current_folder_id=folder_id, + folder_tree=folder_tree + ) + + +@qtab_bp.route('/qtab/upload', methods=['POST']) +@login_required +def qtab_upload(): + """Upload and process images for question extraction.""" + if 'image' not in request.files: + return jsonify({"status": "error", "message": "No image file provided."}), 400 + + image = request.files['image'] + if image.filename == '': + return jsonify({"status": "error", "message": "No selected file."}), 400 + + folder_id = request.form.get('folder_id') + if folder_id == 'null' or folder_id == '': + folder_id = None + + try: + # Save the uploaded image + filename = secure_filename(image.filename) + timestamp = datetime.now().strftime('%Y%m%d_%H%M%S') + saved_filename = f"qtab_{current_user.id}_{timestamp}_{filename}" + + # Create qtab folder if it doesn't exist + qtab_folder = os.path.join(current_app.config['UPLOAD_FOLDER'], 'qtab') + os.makedirs(qtab_folder, exist_ok=True) + + file_path = os.path.join(qtab_folder, saved_filename) + image.save(file_path) + + # Process the image with Gemini + result = process_image_for_questions(file_path, current_user.username) + + # Store in database + conn = get_db_connection() + conn.execute( + '''INSERT INTO qtab_images + (user_id, folder_id, filename, original_name, result_json, status) + VALUES (?, ?, ?, ?, ?, ?)''', + (current_user.id, folder_id, saved_filename, filename, + json.dumps(result), result.get('status', 'error')) + ) + conn.commit() + image_id = conn.execute('SELECT last_insert_rowid()').fetchone()[0] + conn.close() + + result['image_id'] = image_id + return jsonify(result) + + except Exception as e: + logger.error(f"Upload error: {str(e)}") + return jsonify({ + "status": "error", + "message": "An error occurred.", + "error": str(e), + "metadata": { + "processed_at": datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S"), + "processed_by": current_user.username + } + }), 500 + + +@qtab_bp.route('/qtab/image/') +@login_required +def qtab_get_image(image_id): + """Get details of a specific qtab image.""" + conn = get_db_connection() + image = conn.execute( + 'SELECT * FROM qtab_images WHERE id = ? AND user_id = ?', + (image_id, current_user.id) + ).fetchone() + conn.close() + + if not image: + return jsonify({'error': 'Image not found'}), 404 + + return jsonify(dict(image)) + + +@qtab_bp.route('/qtab/image//delete', methods=['DELETE']) +@login_required +def qtab_delete_image(image_id): + """Delete a qtab image.""" + conn = get_db_connection() + + # Check ownership + image = conn.execute( + 'SELECT filename FROM qtab_images WHERE id = ? AND user_id = ?', + (image_id, current_user.id) + ).fetchone() + + if not image: + conn.close() + return jsonify({'error': 'Image not found or unauthorized'}), 404 + + # Delete file + qtab_folder = os.path.join(current_app.config['UPLOAD_FOLDER'], 'qtab') + file_path = os.path.join(qtab_folder, image['filename']) + try: + if os.path.exists(file_path): + os.remove(file_path) + except OSError as e: + logger.error(f"Error deleting file: {e}") + + # Delete from database + conn.execute('DELETE FROM qtab_images WHERE id = ?', (image_id,)) + conn.commit() + conn.close() + + return jsonify({'success': True}) + + +@qtab_bp.route('/qtab/image//rename', methods=['PUT']) +@login_required +def qtab_rename_image(image_id): + """Rename a qtab image.""" + data = request.json + new_name = data.get('name', '').strip() + + if not new_name: + return jsonify({'error': 'Name is required'}), 400 + + conn = get_db_connection() + + # Check ownership + image = conn.execute( + 'SELECT id FROM qtab_images WHERE id = ? AND user_id = ?', + (image_id, current_user.id) + ).fetchone() + + if not image: + conn.close() + return jsonify({'error': 'Image not found or unauthorized'}), 404 + + try: + conn.execute( + 'UPDATE qtab_images SET original_name = ? WHERE id = ?', + (new_name, image_id) + ) + conn.commit() + conn.close() + return jsonify({'success': True}) + except Exception as e: + conn.rollback() + conn.close() + return jsonify({'error': str(e)}), 500 + + +@qtab_bp.route('/qtab/create_folder', methods=['POST']) +@login_required +def qtab_create_folder(): + """Create a new folder in qtab_folders table.""" + data = request.json + folder_name = data.get('name', '').strip() + parent_id = data.get('parent_id') + + if not folder_name: + return jsonify({'error': 'Folder name is required'}), 400 + + if parent_id == 'null' or parent_id == '': + parent_id = None + + conn = get_db_connection() + try: + # Check if folder with same name exists at this level + if parent_id: + existing = conn.execute( + 'SELECT id FROM qtab_folders WHERE name = ? AND parent_id = ? AND user_id = ?', + (folder_name, parent_id, current_user.id) + ).fetchone() + else: + existing = conn.execute( + 'SELECT id FROM qtab_folders WHERE name = ? AND parent_id IS NULL AND user_id = ?', + (folder_name, current_user.id) + ).fetchone() + + if existing: + conn.close() + return jsonify({'error': 'Folder with this name already exists'}), 400 + + conn.execute( + 'INSERT INTO qtab_folders (name, parent_id, user_id) VALUES (?, ?, ?)', + (folder_name, parent_id, current_user.id) + ) + conn.commit() + folder_id = conn.execute('SELECT last_insert_rowid()').fetchone()[0] + conn.close() + + return jsonify({'success': True, 'folder_id': folder_id}) + except Exception as e: + conn.rollback() + conn.close() + return jsonify({'error': str(e)}), 500 + + +@qtab_bp.route('/qtab/move_images', methods=['POST']) +@login_required +def qtab_move_images(): + """Move images to a different folder.""" + data = request.json + image_ids = data.get('image_ids', []) + target_folder_id = data.get('target_folder_id') + + if not image_ids: + return jsonify({'error': 'No images selected'}), 400 + + if target_folder_id == 'null' or target_folder_id == '': + target_folder_id = None + + conn = get_db_connection() + try: + # Verify target folder ownership if not root + if target_folder_id: + owner = conn.execute( + 'SELECT user_id FROM qtab_folders WHERE id = ?', + (target_folder_id,) + ).fetchone() + if not owner or owner['user_id'] != current_user.id: + conn.close() + return jsonify({'error': 'Unauthorized target folder'}), 403 + + # Move images + placeholders = ', '.join('?' * len(image_ids)) + conn.execute( + f'UPDATE qtab_images SET folder_id = ? WHERE id IN ({placeholders}) AND user_id = ?', + (target_folder_id, *image_ids, current_user.id) + ) + + conn.commit() + return jsonify({'success': True}) + except Exception as e: + conn.rollback() + return jsonify({'error': str(e)}), 500 + finally: + conn.close() + + +@qtab_bp.route('/qtab/serve/') +@login_required +def serve_qtab_image(filename): + """Serve qtab images with security check.""" + conn = get_db_connection() + + # Verify ownership + image = conn.execute( + 'SELECT user_id FROM qtab_images WHERE filename = ?', + (filename,) + ).fetchone() + conn.close() + + if not image or image['user_id'] != current_user.id: + return "Unauthorized", 403 + + qtab_folder = os.path.join(current_app.config['UPLOAD_FOLDER'], 'qtab') + return send_from_directory(qtab_folder, filename) + + +@qtab_bp.route('/qtab/exam/') +@login_required +def qtab_exam_mode(image_id): + """Exam mode - one question at a time with keyboard navigation.""" + conn = get_db_connection() + + # Get the image and verify ownership + image = conn.execute( + 'SELECT * FROM qtab_images WHERE id = ? AND user_id = ?', + (image_id, current_user.id) + ).fetchone() + + if not image: + conn.close() + flash('Image not found or unauthorized', 'danger') + return redirect(url_for('qtab.qtab_list')) + + # Parse the result JSON to get all questions + result_json = json.loads(image['result_json']) if image['result_json'] else {} + + # Get all images in the same folder for navigation + if image['folder_id']: + all_images = conn.execute( + 'SELECT id, original_name, status FROM qtab_images WHERE folder_id = ? AND user_id = ? ORDER BY created_at', + (image['folder_id'], current_user.id) + ).fetchall() + else: + all_images = conn.execute( + 'SELECT id, original_name, status FROM qtab_images WHERE folder_id IS NULL AND user_id = ? ORDER BY created_at', + (current_user.id,) + ).fetchall() + + conn.close() + + return render_template( + 'qtab_exam.html', + image=dict(image), + result_json=result_json, + all_images=[dict(img) for img in all_images], + current_index=next((i for i, img in enumerate(all_images) if img['id'] == image_id), 0) + ) diff --git a/redact.py b/redact.py new file mode 100644 index 0000000000000000000000000000000000000000..eb61fd07cfd639ab7f4f207e29fdfb5ee71c1de1 --- /dev/null +++ b/redact.py @@ -0,0 +1,218 @@ +# main_redaction_processor.py + +# Required packages: pip install requests Pillow +import os +import requests +from PIL import Image, ImageDraw +import io +import base64 +import json + +# --- Configuration --- +# API endpoints should remain constant +INVOKE_URL_OCR = "https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-ocr-v1" +INVOKE_URL_PARSER = "https://integrate.api.nvidia.com/v1/chat/completions" + +# Define a max pixel count for the parser model to avoid sending overly large images. +MAX_PIXELS_FOR_PARSER = 1024 * 1024 # 1 Megapixel + +# --- Internal Helper Functions --- + +def _get_average_color_from_regions(image: Image.Image, regions: list[tuple]): + """Calculates the average RGB color from a list of regions in an image.""" + total_r, total_g, total_b = 0, 0, 0 + pixel_count = 0 + img_width, img_height = image.size + if image.mode == 'RGBA': image = image.convert('RGB') + pixels = image.load() + for region in regions: + x1, y1, x2, y2 = [max(0, int(c)) for c in region] + x2 = min(img_width, x2); y2 = min(img_height, y2) + for x in range(x1, x2): + for y in range(y1, y2): + r, g, b = pixels[x, y] + total_r += r; total_g += g; total_b += b + pixel_count += 1 + if pixel_count == 0: return (0, 0, 0) + return (total_r // pixel_count, total_g // pixel_count, total_b // pixel_count) + + +def _detect_pictures_with_parser(image_to_process: Image.Image, api_key: str): + """Sends an image to the NemoRetriever Parser model to detect 'Picture' elements.""" + headers = {"Authorization": f"Bearer {api_key}", "Accept": "application/json"} + buffered = io.BytesIO() + image_to_process.save(buffered, format="PNG") + b64_str = base64.b64encode(buffered.getvalue()).decode("ascii") + + content = f'' + tool_name = "markdown_bbox" + payload = { + "model": "nvidia/nemoretriever-parse", + "messages": [{"role": "user", "content": content}], + "tools": [{"type": "function", "function": {"name": tool_name}}], + "tool_choice": {"type": "function", "function": {"name": tool_name}}, + "max_tokens": 2048, + } + + response = requests.post(INVOKE_URL_PARSER, headers=headers, json=payload, timeout=120) + response.raise_for_status() + response_json = response.json() + + picture_bboxes = [] + tool_calls = response_json.get('choices', [{}])[0].get('message', {}).get('tool_calls', []) + if tool_calls: + arguments_str = tool_calls[0].get('function', {}).get('arguments', '[]') + parsed_arguments = json.loads(arguments_str) + if parsed_arguments and isinstance(parsed_arguments, list): + for element in parsed_arguments[0]: + if element.get("type") == "Picture" and element.get("bbox"): + picture_bboxes.append(element["bbox"]) + return picture_bboxes + + +def _redact_text_in_image(input_image: Image.Image, api_key: str): + """Sends a (cropped) image to the OCR model and returns a redacted version.""" + headers = {"Authorization": f"Bearer {api_key}", "Accept": "application/json"} + buffered = io.BytesIO() + input_image.save(buffered, format="PNG") + image_b64 = base64.b64encode(buffered.getvalue()).decode() + + payload = {"input": [{"type": "image_url", "url": f"data:image/png;base64,{image_b64}"}]} + try: + response = requests.post(INVOKE_URL_OCR, headers=headers, json=payload, timeout=60) + response.raise_for_status() + response_json = response.json() + except requests.exceptions.RequestException: return input_image + + image_with_redactions = input_image.copy() + draw = ImageDraw.Draw(image_with_redactions) + img_width, img_height = image_with_redactions.size + radius = max(1, int(((img_width**2 + img_height**2)**0.5) / 100)) + + try: + detections = response_json['data'][0]['text_detections'] + for detection in detections: + bbox = detection.get("bounding_box") + if bbox and bbox.get("points"): + points = bbox["points"] + p1 = (points[0]['x'] * img_width, points[0]['y'] * img_height) + p3 = (points[2]['x'] * img_width, points[2]['y'] * img_height) + sample_regions = [(p1[0], p1[1] - radius, p3[0], p1[1]), (p1[0], p3[1], p3[0], p3[1] + radius), (p1[0] - radius, p1[1], p1[0], p3[1]), (p3[0], p1[1], p3[0] + radius, p3[1])] + redaction_color = _get_average_color_from_regions(image_with_redactions, sample_regions) + draw.rectangle([p1, p3], fill=redaction_color) + return image_with_redactions + except (KeyError, IndexError, TypeError): return input_image + + +# --- Main Public Function --- + +def redact_pictures_in_image(image_source: str, api_key: str, callback: callable = None) -> Image.Image: + """ + Analyzes an image to find pictures, then redacts text within those pictures. + + Args: + image_source (str): The source of the image. Can be a local file path + or a base64 encoded string. + api_key (str): Your NVIDIA API key. + callback (callable, optional): A function to call with progress updates. + Defaults to None. The function should accept + a single string argument. + + Returns: + Image.Image: A PIL Image object with the text inside pictures redacted. + """ + + def _progress(message: str): + if callback: + callback(message) + + _progress("Step 1: Loading image...") + try: + if os.path.exists(image_source): + input_image = Image.open(image_source).convert("RGB") + else: + image_bytes = base64.b64decode(image_source) + input_image = Image.open(io.BytesIO(image_bytes)).convert("RGB") + except Exception as e: + raise ValueError(f"Invalid image_source: not a valid file path or base64 string. Error: {e}") + + # --- Resize if necessary for analysis --- + image_to_analyze = input_image + original_width, original_height = input_image.size + if (original_width * original_height) > MAX_PIXELS_FOR_PARSER: + _progress(f"Image is large, resizing for initial analysis...") + scale = (MAX_PIXELS_FOR_PARSER / (original_width * original_height))**0.5 + new_dims = (int(original_width * scale), int(original_height * scale)) + image_to_analyze = input_image.resize(new_dims, Image.Resampling.LANCZOS) + + # --- Detect Pictures --- + _progress("Step 2: Detecting 'Picture' elements...") + try: + picture_bboxes = _detect_pictures_with_parser(image_to_analyze, api_key) + except requests.exceptions.RequestException as e: + _progress(f"API Error during picture detection: {e}") + raise # Re-raise the exception after reporting progress + + if not picture_bboxes: + _progress("No 'Picture' elements were found. Returning original image.") + return input_image + + _progress(f"Step 3: Found {len(picture_bboxes)} 'Picture' element(s). Redacting text...") + final_image = input_image.copy() + + # --- Crop, Redact, and Paste --- + for i, box in enumerate(picture_bboxes): + _progress(f" - Processing picture {i + 1} of {len(picture_bboxes)}...") + x1 = int(box["xmin"] * original_width) + y1 = int(box["ymin"] * original_height) + x2 = int(box["xmax"] * original_width) + y2 = int(box["ymax"] * original_height) + + # Crop from the original, high-resolution image + cropped_element = input_image.crop((x1, y1, x2, y2)) + + redacted_crop = _redact_text_in_image(cropped_element, api_key) + + # Paste the redacted, high-resolution crop back + final_image.paste(redacted_crop, (x1, y1)) + + _progress("Step 4: Redaction process complete.") + return final_image + + +# --- Example Usage --- +if __name__ == "__main__": + + # Define a simple callback function to print progress to the console. + def print_progress(message: str): + print(f"[PROGRESS] {message}") + + # 1. Get API Key from environment variable + my_api_key = os.getenv("NVIDIA_API_KEY") + if not my_api_key: + print("ERROR: Please set the NVIDIA_API_KEY environment variable.") + else: + # 2. Define the path to your input image + # (replace with your actual image file) + input_image_path = "yolox1.png" # Make sure this image exists + + if not os.path.exists(input_image_path): + print(f"ERROR: Input image not found at '{input_image_path}'") + else: + print("--- Running Redaction on Image Path ---") + try: + # 3. Call the main function with the image path and callback + redacted_image = redact_pictures_in_image( + image_source=input_image_path, + api_key=my_api_key, + callback=print_progress + ) + + # 4. Save the result + output_path = "redacted_output.png" + redacted_image.save(output_path) + print(f"\nSuccessfully saved redacted image to '{output_path}'") + + except Exception as e: + print(f"\nAn error occurred: {e}") + diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..57c201a888ce5cacc7717c4e679fee206899fb70 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,19 @@ +# requirements.txt + +Flask +Flask-Cors +numpy +opencv-python +Pillow +requests +gunicorn +PyMuPDF +tqdm +rich + +imgkit +beautifulsoup4 +google-genai +python-dotenv +flask-socketio +simple-websocket diff --git a/resize.py b/resize.py new file mode 100644 index 0000000000000000000000000000000000000000..4bfb36bbbfe0d51c3a97d3571530ef2f95223778 --- /dev/null +++ b/resize.py @@ -0,0 +1,180 @@ +import fitz # PyMuPDF +import sys + +def expand_pdf_for_notes(input_pdf, output_pdf, bg_color=(1, 1, 1), mode='notes_only', stitch_direction='horizontal', add_space=True, pattern=None, pattern_color=(0.8, 0.8, 0.8)): + """ + Expand or rearrange a PDF for note-taking. + + Args: + input_pdf (str): Path to input PDF file. + output_pdf (str): Path to output PDF file. + bg_color (tuple): RGB color for the notes area background. + mode (str): The processing mode: 'notes_only', 'split', or 'stitch'. + stitch_direction (str): For 'stitch' mode, how to rearrange columns ('horizontal' or 'vertical'). + add_space (bool): If True, add space for notes. + pattern (str): Name of the pattern to draw ('grid', 'dots'). + pattern_color (tuple): RGB color for the pattern. + """ + doc = fitz.open(input_pdf) + new_doc = fitz.open() + + for page_num in range(len(doc)): + page = doc[page_num] + orig_rect = page.rect + orig_width = orig_rect.width + orig_height = orig_rect.height + + left_half_clip = fitz.Rect(0, 0, orig_width / 2, orig_height) + right_half_clip = fitz.Rect(orig_width / 2, 0, orig_width, orig_height) + + if mode == 'split': + # Create a new page for the left half + new_page_width = orig_width / 2 if not add_space else orig_width + left_page = new_doc.new_page(width=new_page_width, height=orig_height) + left_page.show_pdf_page(fitz.Rect(0, 0, orig_width / 2, orig_height), doc, page_num, clip=left_half_clip) + if add_space: + notes_rect = fitz.Rect(orig_width / 2, 0, orig_width, orig_height) + left_page.draw_rect(notes_rect, color=None, fill=bg_color) + if pattern: + _draw_pattern(new_doc, left_page, notes_rect, pattern, pattern_color) + + # Create a new page for the right half + right_page = new_doc.new_page(width=new_page_width, height=orig_height) + right_page.show_pdf_page(fitz.Rect(0, 0, orig_width / 2, orig_height), doc, page_num, clip=right_half_clip) + if add_space: + notes_rect = fitz.Rect(orig_width / 2, 0, orig_width, orig_height) + right_page.draw_rect(notes_rect, color=None, fill=bg_color) + if pattern: + _draw_pattern(new_doc, right_page, notes_rect, pattern, pattern_color) + + elif mode == 'stitch': + if stitch_direction == 'horizontal': + new_width = orig_width + if add_space: + new_width *= 2 + new_page = new_doc.new_page(width=new_width, height=orig_height) + new_page.show_pdf_page(fitz.Rect(0, 0, orig_width / 2, orig_height), doc, page_num, clip=left_half_clip) + new_page.show_pdf_page(fitz.Rect(orig_width / 2, 0, orig_width, orig_height), doc, page_num, clip=right_half_clip) + if add_space: + notes_rect = fitz.Rect(orig_width, 0, new_width, orig_height) + new_page.draw_rect(notes_rect, color=None, fill=bg_color) + if pattern: + _draw_pattern(new_doc, new_page, notes_rect, pattern, pattern_color) + + else: # vertical + new_width = orig_width / 2 + if add_space: + new_width = orig_width + new_height = orig_height * 2 + new_page = new_doc.new_page(width=new_width, height=new_height) + new_page.show_pdf_page(fitz.Rect(0, 0, orig_width / 2, orig_height), doc, page_num, clip=left_half_clip) + new_page.show_pdf_page(fitz.Rect(0, orig_height, orig_width / 2, new_height), doc, page_num, clip=right_half_clip) + if add_space: + notes_rect = fitz.Rect(orig_width / 2, 0, new_width, new_height) + new_page.draw_rect(notes_rect, color=None, fill=bg_color) + if pattern: + _draw_pattern(new_doc, new_page, notes_rect, pattern, pattern_color) + + elif mode == 'notes_only': + if add_space: + new_page = new_doc.new_page(width=orig_width * 2, height=orig_height) + right_rect = fitz.Rect(orig_width, 0, orig_width * 2, orig_height) + new_page.draw_rect(right_rect, color=None, fill=bg_color) + if pattern: + _draw_pattern(new_doc, new_page, right_rect, pattern, pattern_color) + new_page.show_pdf_page(fitz.Rect(0, 0, orig_width, orig_height), doc, page_num) + else: + new_doc.insert_pdf(doc, from_page=page_num, to_page=page_num) + + else: # Default to copying the page if mode is unknown + new_doc.insert_pdf(doc, from_page=page_num, to_page=page_num) + + new_doc.save(output_pdf) + total_pages = len(new_doc) + new_doc.close() + doc.close() + print(f"βœ“ Successfully created: {output_pdf}") + print(f" Pages processed: {total_pages}") + +def _draw_pattern(doc, page, rect, pattern, color): + if pattern == 'grid': + _draw_grid(page, rect, color=color) + elif pattern == 'dots': + _draw_dots(doc, page, rect, color=color) + +def _draw_grid(page, rect, spacing=20, color=(0.8, 0.8, 0.8)): + # Draw vertical lines + for x in range(int(rect.x0), int(rect.x1), spacing): + page.draw_line(fitz.Point(x, rect.y0), fitz.Point(x, rect.y1), color=color, width=0.5) + # Draw horizontal lines + for y in range(int(rect.y0), int(rect.y1), spacing): + page.draw_line(fitz.Point(rect.x0, y), fitz.Point(rect.x1, y), color=color, width=0.5) + +def _draw_dots(doc, page, rect, spacing=20, radius=1, color=(0.8, 0.8, 0.8)): + """Creates a tileable dot pattern using a Form XObject for efficiency.""" + # Create a small rectangle for one pattern unit + stamp_rect = fitz.Rect(0, 0, spacing, spacing) + # Create a new PDF for the stamp + stamp_doc = fitz.open() + stamp_page = stamp_doc.new_page(width=spacing, height=spacing) + + # Draw a single dot in the corner of the stamp page + stamp_page.draw_circle(fitz.Point(radius, radius), radius, color=color, fill=color) + + # Convert the stamp page to a stamp (Form XObject) and get its cross-reference number + stamp_xref = doc.get_xref(stamp_doc.convert_to_pdf()) + stamp_doc.close() + + # Tile the stamp across the target rectangle + for x in range(int(rect.x0), int(rect.x1), spacing): + for y in range(int(rect.y0), int(rect.y1), spacing): + page.show_pdf_page(fitz.Rect(x, y, x + spacing, y + spacing), stamp_xref) + + + +def main(): + """Main function with command-line interface""" + import argparse + parser = argparse.ArgumentParser(description="Expand or rearrange a PDF for note-taking.") + parser.add_argument("input_pdf", help="Path to input PDF file.") + parser.add_argument("output_pdf", nargs='?', help="Path to output PDF file.") + parser.add_argument("--mode", choices=['notes_only', 'split', 'stitch'], default='notes_only', help="Processing mode.") + parser.add_argument("--stitch-direction", choices=['horizontal', 'vertical'], default='horizontal', help="Direction for 'stitch' mode.") + parser.add_argument("--no-space", action='store_true', help="Don't add extra space for notes.") + parser.add_argument("--bg", default='white', help="Background color (white, lightgray, cream)." ) + + args = parser.parse_args() + + output_pdf = args.output_pdf + if not output_pdf: + suffix = f'_{args.mode}' + if args.mode == 'stitch': + suffix += f'_{args.stitch_direction[:4]}' + if not args.no_space: + suffix += '_notes' + suffix += '.pdf' + output_pdf = args.input_pdf.replace('.pdf', suffix) + + bg_colors = { + 'white': (1, 1, 1), + 'lightgray': (0.95, 0.95, 0.95), + 'cream': (1, 0.99, 0.94), + } + bg_color = bg_colors.get(args.bg, (1, 1, 1)) + + try: + expand_pdf_for_notes( + args.input_pdf, + output_pdf, + bg_color=bg_color, + mode=args.mode, + stitch_direction=args.stitch_direction, + add_space=not args.no_space + ) + except Exception as e: + print(f"Error: {e}") + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/restore.py b/restore.py new file mode 100644 index 0000000000000000000000000000000000000000..79f0f71ff936627d09b4b164e10b3b690ad00eb6 --- /dev/null +++ b/restore.py @@ -0,0 +1,103 @@ + +import sqlite3 +import json +import os +import shutil +import zipfile +from database import setup_database + +def restore_from_backup(zip_filename='backup.zip', tmp_dir='tmp_restore'): + """ + Restores the application state from a zip backup. + + :param zip_filename: Name of the backup zip file. + :param tmp_dir: Temporary directory to extract the backup. + """ + if not os.path.exists(zip_filename): + print(f"Backup file not found: {zip_filename}") + return + + # Confirmation prompt + confirm = input("This will wipe all existing data. Are you sure you want to continue? (y/n): ") + if confirm.lower() != 'y': + print("Restore operation cancelled.") + return + + # 1. Clean existing data + print("Cleaning existing data...") + for dir_to_clean in ['instance', 'output', 'processed', 'uploads']: + if os.path.exists(dir_to_clean): + shutil.rmtree(dir_to_clean) + os.makedirs(dir_to_clean) + + # 2. Recreate database schema + print("Setting up new database schema...") + setup_database() + + # 3. Extract the backup + if os.path.exists(tmp_dir): + shutil.rmtree(tmp_dir) + os.makedirs(tmp_dir) + + print(f"Extracting {zip_filename} to {tmp_dir}...") + with zipfile.ZipFile(zip_filename, 'r') as zipf: + zipf.extractall(tmp_dir) + + # 4. Restore database from JSON files + try: + conn = sqlite3.connect('instance/database.db') + cursor = conn.cursor() + + json_files = [f for f in os.listdir(tmp_dir) if f.endswith('.json')] # + for json_file in json_files: + table_name = os.path.splitext(json_file)[0] + file_path = os.path.join(tmp_dir, json_file) + + print(f"Restoring table: {table_name}") + with open(file_path, 'r') as f: + data = json.load(f) + + if not data: + continue + + columns = data[0].keys() + placeholders = ', '.join(['?' for _ in columns]) + query = f"INSERT INTO {table_name} ({', '.join(columns)}) VALUES ({placeholders})" + + for row in data: + values = [row.get(col) for col in columns] + cursor.execute(query, values) + + conn.commit() + print("Database restore complete.") + + except sqlite3.Error as e: + print(f"Database error during restore: {e}") + finally: + if conn: + conn.close() + + # 5. Restore file directories + print("Restoring file directories...") + for dir_name in ['output', 'processed', 'uploads']: + source_dir = os.path.join(tmp_dir, dir_name) + dest_dir = dir_name + if os.path.exists(source_dir): + # Copy contents, not the directory itself + for item in os.listdir(source_dir): + s = os.path.join(source_dir, item) + d = os.path.join(dest_dir, item) + if os.path.isdir(s): + shutil.copytree(s, d, dirs_exist_ok=True) + else: + shutil.copy2(s, d) + print(f"Restored directory: {dir_name}") + + # 6. Clean up temporary directory + shutil.rmtree(tmp_dir) + print(f"Cleaned up temporary directory: {tmp_dir}") + + print("\nRestore complete!") + +if __name__ == '__main__': + restore_from_backup() diff --git a/routes.py b/routes.py new file mode 100644 index 0000000000000000000000000000000000000000..b2d69b0f56d53e6b80c2a931d1ee2ca10fa3717b --- /dev/null +++ b/routes.py @@ -0,0 +1,2447 @@ + +import os +import uuid +import base64 +import io +import zipfile +import threading +import copy +import re +from datetime import datetime, timedelta +from flask import Blueprint, render_template, request, jsonify, current_app, url_for, send_from_directory, send_file, redirect +from flask_login import login_required, current_user +from werkzeug.utils import secure_filename +import shlex +import fitz +from urllib.parse import urlparse +import requests +import cv2 +import numpy as np + +from database import get_folder_tree, get_all_descendant_folder_ids +from processing import ( + resize_image_if_needed, + call_nim_ocr_api, + extract_question_number_from_ocr_result, + crop_image_perspective, + create_pdf_from_full_images, + remove_color_from_image +) + +from strings import * +from utils import get_db_connection, create_a4_pdf_from_images +from redact import redact_pictures_in_image +from resize import expand_pdf_for_notes + +# Global dictionary to store async processing status +# Key: session_id, Value: {'status': 'processing'|'completed'|'error', 'progress': int, 'total': int, 'message': str} +upload_progress = {} + +main_bp = Blueprint('main', __name__) + +@main_bp.route('/upload_progress/') +@login_required +def get_upload_progress(session_id): + status = upload_progress.get(session_id) + if not status: + # Check if session exists in DB (maybe it finished and server restarted, or we missed it) + conn = get_db_connection() + exists = conn.execute('SELECT id FROM sessions WHERE id = ?', (session_id,)).fetchone() + conn.close() + if exists: + return jsonify({'status': 'completed', 'progress': 100}) + return jsonify({'error': 'Session not found or processing not started'}), 404 + return jsonify(status) + +def process_pdf_background(session_id, user_id, original_filename, pdf_content, app_config): + """Background task to process PDF splitting.""" + upload_progress[session_id] = {'status': 'processing', 'progress': 0, 'message': 'Starting...'} + + try: + # We need to manually create a connection since we are in a thread + # And we can't use current_app context directly if not carefully managed, + # but we passed app_config to reconstruct paths. + # Database connection needs to be fresh. + + conn = get_db_connection() # This creates a new connection + + pdf_filename = f"{session_id}_{original_filename}" + pdf_path = os.path.join(app_config['UPLOAD_FOLDER'], pdf_filename) + + with open(pdf_path, 'wb') as f: + f.write(pdf_content) + + doc = fitz.open(pdf_path) + total_pages = len(doc) + upload_progress[session_id]['total'] = total_pages + + # Fetch user DPI - we need to query it since current_user proxy might not work in thread + user_row = conn.execute("SELECT dpi FROM users WHERE id = ?", (user_id,)).fetchone() + dpi = user_row['dpi'] if user_row else 150 + + for i, page in enumerate(doc): + pix = page.get_pixmap(dpi=dpi) + page_filename = f"{session_id}_page_{i}.png" + page_path = os.path.join(app_config['UPLOAD_FOLDER'], page_filename) + pix.save(page_path) + + conn.execute( + 'INSERT INTO images (session_id, image_index, filename, original_name, image_type) VALUES (?, ?, ?, ?, ?)', + (session_id, i, page_filename, f"Page {i+1}", 'original') + ) + + # Update progress + progress = int(((i + 1) / total_pages) * 100) + upload_progress[session_id].update({'progress': progress, 'message': f'Processed page {i+1}/{total_pages}'}) + + conn.commit() + conn.close() + doc.close() + + upload_progress[session_id] = {'status': 'completed', 'progress': 100, 'message': 'Done'} + + except Exception as e: + print(f"Async processing error: {e}") + upload_progress[session_id] = {'status': 'error', 'message': str(e)} + if 'conn' in locals(): conn.close() + +# ... existing imports ... + +@main_bp.route('/process_color_rm_batch', methods=['POST']) +@login_required +def process_color_rm_batch(): + data = request.json + session_id = data.get('session_id') + target_colors = data.get('colors', []) + threshold = data.get('threshold', 0.8) + bg_mode = data.get('bg_mode', 'black') + region_box = data.get('region', None) + + conn = get_db_connection() + + # Check ownership + session_owner = conn.execute('SELECT user_id FROM sessions WHERE id = ?', (session_id,)).fetchone() + if not session_owner or session_owner['user_id'] != current_user.id: + conn.close() + return jsonify({'error': 'Unauthorized'}), 403 + + # Get all original images + original_images = conn.execute( + "SELECT * FROM images WHERE session_id = ? AND image_type = 'original' ORDER BY image_index", + (session_id,) + ).fetchall() + + processed_count = 0 + + try: + for img in original_images: + original_path = os.path.join(current_app.config['UPLOAD_FOLDER'], img['filename']) + + if not os.path.exists(original_path): + continue + + # Process + processed_img_cv = remove_color_from_image(original_path, target_colors, threshold, bg_mode, region_box) + + # Save + processed_filename = f"color_rm_{session_id}_{img['image_index']}_{datetime.now().strftime('%H%M%S')}.png" + processed_path = os.path.join(current_app.config['PROCESSED_FOLDER'], processed_filename) + cv2.imwrite(processed_path, processed_img_cv) + + # Update DB (upsert logic roughly) + # Check if exists first + existing = conn.execute( + "SELECT id FROM images WHERE session_id = ? AND image_index = ? AND image_type = 'color_rm'", + (session_id, img['image_index']) + ).fetchone() + + if existing: + conn.execute( + "UPDATE images SET processed_filename = ?, filename = ? WHERE id = ?", + (processed_filename, img['filename'], existing['id']) + ) + else: + conn.execute( + 'INSERT INTO images (session_id, image_index, filename, original_name, processed_filename, image_type) VALUES (?, ?, ?, ?, ?, ?)', + (session_id, img['image_index'], img['filename'], img['original_name'], processed_filename, 'color_rm') + ) + processed_count += 1 + + conn.commit() + return jsonify({'success': True, 'count': processed_count}) + + except Exception as e: + return jsonify({'error': str(e)}), 500 + finally: + conn.close() + +@main_bp.route('/generate_color_rm_pdf/') +@login_required +def generate_color_rm_pdf(session_id): + conn = get_db_connection() + + # Check ownership + session_data = conn.execute('SELECT user_id, original_filename FROM sessions WHERE id = ?', (session_id,)).fetchone() + if not session_data or session_data['user_id'] != current_user.id: + conn.close() + return jsonify({'error': 'Unauthorized'}), 403 + + # Range filtering + start_page = request.args.get('start', type=int) + end_page = request.args.get('end', type=int) + + query = "SELECT * FROM images WHERE session_id = ? AND image_type = 'original'" + params = [session_id] + + if start_page: + query += " AND image_index >= ?" + params.append(start_page - 1) # 0-based index + if end_page: + query += " AND image_index <= ?" + params.append(end_page - 1) + + query += " ORDER BY image_index" + + images = conn.execute(query, params).fetchall() + + pdf_image_paths = [] + + for img in images: + # Check for processed version + processed = conn.execute( + "SELECT processed_filename FROM images WHERE session_id = ? AND image_index = ? AND image_type = 'color_rm'", + (session_id, img['image_index']) + ).fetchone() + + if processed and processed['processed_filename']: + path = os.path.join(current_app.config['PROCESSED_FOLDER'], processed['processed_filename']) + else: + # Fallback to original + path = os.path.join(current_app.config['UPLOAD_FOLDER'], img['filename']) + + if os.path.exists(path): + pdf_image_paths.append(path) + + if not pdf_image_paths: + conn.close() + return "No images found to generate PDF", 404 + + # Generate PDF + range_suffix = "" + if start_page or end_page: + range_suffix = f"_pg{start_page or 1}-{end_page or 'end'}" + + pdf_filename = f"Color_Removed_{session_data['original_filename']}{range_suffix}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.pdf" + if not pdf_filename.lower().endswith('.pdf'): pdf_filename += ".pdf" + + output_path = os.path.join(current_app.config['OUTPUT_FOLDER'], pdf_filename) + + # Use user's preferred color_rm_dpi for PDF resolution + pdf_resolution = current_user.color_rm_dpi if hasattr(current_user, 'color_rm_dpi') else 200.0 + + success = create_pdf_from_full_images(pdf_image_paths, output_path, resolution=pdf_resolution) + + if success: + conn.execute( + 'INSERT INTO generated_pdfs (session_id, filename, subject, user_id) VALUES (?, ?, ?, ?)', + (session_id, pdf_filename, "Color Removal Export", current_user.id) + ) + conn.commit() + conn.close() + return redirect(url_for('main.pdf_manager')) # Redirect to manager or download directly + else: + conn.close() + return "Failed to generate PDF", 500 + +@main_bp.route('/tmp/') +@login_required # Should still protect temp files if they are user-specific +def serve_tmp_file(filename): + # In a real multi-user scenario, you'd check if this temp file belongs to the user + return send_from_directory(current_app.config['TEMP_FOLDER'], filename) + +@main_bp.route('/processed/') +@login_required +def serve_processed_file(filename): + # This is a critical security change. Before serving a processed file, + # we must check if it belongs to the current user. + conn = get_db_connection() + image_owner = conn.execute( + "SELECT s.user_id FROM images i JOIN sessions s ON i.session_id = s.id WHERE i.processed_filename = ?", + (filename,) + ).fetchone() + conn.close() + + if image_owner and image_owner['user_id'] == current_user.id: + return send_from_directory(current_app.config['PROCESSED_FOLDER'], filename) + else: + return "Unauthorized", 403 + + +NVIDIA_API_KEY = os.getenv("NVIDIA_API_KEY") +NVIDIA_NIM_AVAILABLE = bool(NVIDIA_API_KEY) + +@main_bp.route('/process_final_pdf/') +@login_required +def process_final_pdf(pdf_id): + conn = get_db_connection() + # Security: Check if the PDF belongs to the current user + pdf_info = conn.execute('SELECT filename FROM generated_pdfs WHERE id = ? AND user_id = ?', (pdf_id, current_user.id)).fetchone() + + if not pdf_info: + conn.close() + flash("PDF not found or you don't have permission to access it.", "warning") + return redirect(url_for('main.index_v2')) + + original_filename = pdf_info['filename'] + pdf_path = os.path.join(current_app.config['OUTPUT_FOLDER'], original_filename) + + if not os.path.exists(pdf_path): + conn.close() + flash("PDF file is missing from disk.", "danger") + return redirect(url_for('main.index_v2')) + + session_id = str(uuid.uuid4()) + + # Associate new session with the current user + conn.execute('INSERT INTO sessions (id, original_filename, user_id) VALUES (?, ?, ?)', (session_id, original_filename, current_user.id)) + + doc = fitz.open(pdf_path) + for i, page in enumerate(doc): + pix = page.get_pixmap(dpi=current_user.dpi) + page_filename = f"{session_id}_page_{i}.png" + page_path = os.path.join(current_app.config['UPLOAD_FOLDER'], page_filename) + pix.save(page_path) + + conn.execute( + 'INSERT INTO images (session_id, image_index, filename, original_name, image_type) VALUES (?, ?, ?, ?, ?)', + (session_id, i, page_filename, f"Page {i+1}", 'original') + ) + + conn.commit() + conn.close() + doc.close() + + return redirect(url_for('main.crop_interface_v2', session_id=session_id, image_index=0)) + +@main_bp.route(ROUTE_INDEX_V2) +@login_required +def index_v2(): + conn = get_db_connection() + pdfs = conn.execute('SELECT id, filename, subject, tags, notes, persist FROM generated_pdfs WHERE user_id = ? ORDER BY created_at DESC', (current_user.id,)).fetchall() + conn.close() + return render_template('indexv2.html', pdfs=[dict(row) for row in pdfs]) + +def _parse_curl_command(command): + current_app.logger.info(f"Parsing cURL command: {command}") + try: + parts = shlex.split(command) + except ValueError as e: + current_app.logger.error(f"shlex splitting failed for command: '{command}'. Error: {e}") + # Fallback to simple split for commands that might not be perfectly quoted + parts = command.split() + + current_app.logger.info(f"Command parts: {parts}") + url, output_filename = None, None + + # First, try to find the output filename if -o is present + try: + if '-o' in parts: + o_index = parts.index('-o') + if o_index + 1 < len(parts): + output_filename = parts[o_index + 1] # shlex handles quotes + except ValueError: + pass # -o not found, handled below + + # Then, find the URL (always starts with http) + for part in parts: + if part.startswith('http'): + url = part + break + + # If URL found but no output filename was specified with -o, derive from URL + if url and not output_filename: + output_filename = os.path.basename(urlparse(url).path) + if not output_filename: # Fallback if path is empty (e.g., http://example.com) + output_filename = "downloaded_pdf.pdf" + if not output_filename.lower().endswith('.pdf'): + output_filename += '.pdf' # Ensure it has a .pdf extension + + current_app.logger.info(f"Parsed URL: {url}, Filename: {output_filename}") + return url, output_filename + +def convert_google_drive_url(url): + """ + Converts a Google Drive view/sharing URL into a direct download URL. + Returns the original URL if it's not a Google Drive URL or if conversion fails. + """ + try: + # Regex to extract the file ID from common Google Drive URL patterns + # Matches: .../file/d/FILE_ID/... or ...?id=FILE_ID... + match = re.search(r'/file/d/([a-zA-Z0-9_-]+)|id=([a-zA-Z0-9_-]+)', url) + + if match: + # group(1) matches /file/d/..., group(2) matches ?id=... + file_id = match.group(1) or match.group(2) + if file_id: + return f'https://drive.google.com/uc?export=download&id={file_id}' + except Exception as e: + current_app.logger.warning(f"Error converting Google Drive URL: {e}") + + return url + +@main_bp.route('/v2/upload', methods=['POST']) +@login_required +def v2_upload(): + session_id = str(uuid.uuid4()) + pdf_content, original_filename = None, None + + try: + # Case 1: Direct file upload + if 'pdf' in request.files and request.files['pdf'].filename: + file = request.files['pdf'] + if file and file.filename.lower().endswith('.pdf'): + original_filename = secure_filename(file.filename) + pdf_content = file.read() + else: + return jsonify({'error': 'Invalid file type, please upload a PDF'}), 400 + + # Case 2: URL upload + elif 'pdf_url' in request.form and request.form['pdf_url']: + pdf_url = request.form['pdf_url'] + + # Handle Google Drive URLs + pdf_url = convert_google_drive_url(pdf_url) + + response = requests.get(pdf_url, allow_redirects=True) + response.raise_for_status() + + # Try to get filename from Content-Disposition header + if 'Content-Disposition' in response.headers: + cd = response.headers['Content-Disposition'] + # Simple extraction, can be improved with cgi.parse_header or similar + fname_match = re.search(r'filename="?([^";]+)"?', cd) + if fname_match: + original_filename = fname_match.group(1) + + if not original_filename: + original_filename = os.path.basename(urlparse(pdf_url).path) + + if not original_filename or not original_filename.lower().endswith('.pdf'): + original_filename = 'downloaded_document.pdf' + + pdf_content = response.content + + # Case 3: cURL command upload + elif 'curl_command' in request.form and request.form['curl_command']: + # For simplicity, we handle one cURL command at a time for the analysis workflow + command = request.form['curl_command'].strip().split('\n')[0] + url, filename = _parse_curl_command(command) + if not url or not filename: + return jsonify({'error': f"Could not parse cURL command: {command}"}), 400 + + # Handle Google Drive URLs in cURL too (though unlikely if cURL is used correctly) + url = convert_google_drive_url(url) + + response = requests.get(url, allow_redirects=True) + response.raise_for_status() + original_filename = filename + pdf_content = response.content + + else: + return jsonify({'error': 'No PDF file, URL, or cURL command provided'}), 400 + + if not pdf_content or not original_filename: + return jsonify({'error': 'Failed to retrieve PDF content or filename'}), 500 + + session_type = request.form.get('type', 'standard') + conn = get_db_connection() + conn.execute('INSERT INTO sessions (id, original_filename, name, user_id, session_type) VALUES (?, ?, ?, ?, ?)', (session_id, original_filename, original_filename, current_user.id, session_type)) + conn.commit() # Commit session creation first + conn.close() + + # Check for async request + is_async = request.args.get('async') == 'true' + + if is_async: + # Start background thread + # We pass app config copy to be safe + app_config = current_app.config.copy() + thread = threading.Thread(target=process_pdf_background, args=(session_id, current_user.id, original_filename, pdf_content, app_config)) + thread.start() + + return jsonify({'session_id': session_id, 'status': 'processing'}) + + # --- Sync processing logic --- + # Re-open connection for sync processing + conn = get_db_connection() + + pdf_filename = f"{session_id}_{original_filename}" + pdf_path = os.path.join(current_app.config['UPLOAD_FOLDER'], pdf_filename) + with open(pdf_path, 'wb') as f: + f.write(pdf_content) + + doc = fitz.open(pdf_path) + page_files = [] + for i, page in enumerate(doc): + pix = page.get_pixmap(dpi=current_user.dpi) + page_filename = f"{session_id}_page_{i}.png" + page_path = os.path.join(current_app.config['UPLOAD_FOLDER'], page_filename) + pix.save(page_path) + + conn.execute( + 'INSERT INTO images (session_id, image_index, filename, original_name, image_type) VALUES (?, ?, ?, ?, ?)', + (session_id, i, page_filename, f"Page {i+1}", 'original') + ) + page_files.append({'filename': page_filename, 'original_name': f"Page {i+1}", 'index': i}) + + conn.commit() + conn.close() + doc.close() + return jsonify({'session_id': session_id, 'files': page_files}) + + except requests.RequestException as e: + return jsonify({'error': f"Failed to download PDF from URL: {e}"}), 500 + except Exception as e: + # Ensure connection is closed on error + if 'conn' in locals() and conn: + conn.rollback() + conn.close() + current_app.logger.error(f"An error occurred during v2 upload: {e}") + return jsonify({'error': "An internal error occurred while processing the PDF."}), 500 + + +@main_bp.route(ROUTE_IMAGES) +@login_required +def image_upload(): + return render_template('image_upload.html') + +@main_bp.route(ROUTE_UPLOAD_IMAGES, methods=[METHOD_POST]) +@login_required +def upload_images(): + session_id = str(uuid.uuid4()) + + if 'images' not in request.files: + return jsonify({'error': 'No image files part'}), 400 + + files = request.files.getlist('images') + + if not files or all(f.filename == '' for f in files): + return jsonify({'error': 'No selected files'}), 400 + + valid_extensions = {'.png', '.jpg', '.jpeg', '.gif', '.bmp'} + for file in files: + if not file or not any(file.filename.lower().endswith(ext) for ext in valid_extensions): + return jsonify({'error': 'Invalid file type. Please upload only image files (PNG, JPG, JPEG, GIF, BMP)'}), 400 + + session_type = request.form.get('type', 'standard') + conn = get_db_connection() + original_filename = f"{len(files)} images" if len(files) > 1 else secure_filename(files[0].filename) if files else "images" + conn.execute('INSERT INTO sessions (id, original_filename, name, user_id, session_type) VALUES (?, ?, ?, ?, ?)', (session_id, original_filename, original_filename, current_user.id, session_type)) + + uploaded_files = [] + for i, file in enumerate(files): + if file and file.filename != '': + filename = f"{session_id}_{secure_filename(file.filename)}" + file_path = os.path.join(current_app.config['UPLOAD_FOLDER'], filename) + file.save(file_path) + + conn.execute( + 'INSERT INTO images (session_id, image_index, filename, original_name, image_type) VALUES (?, ?, ?, ?, ?)', + (session_id, i, filename, secure_filename(file.filename), 'original') + ) + uploaded_files.append({'filename': filename, 'original_name': secure_filename(file.filename), 'index': i}) + + conn.commit() + conn.close() + + return jsonify({'session_id': session_id, 'files': uploaded_files}) + +@main_bp.route('/api/session_images/') +@login_required +def get_session_images(session_id): + conn = get_db_connection() + + # Security check + session_owner = conn.execute('SELECT user_id FROM sessions WHERE id = ?', (session_id,)).fetchone() + if not session_owner or session_owner['user_id'] != current_user.id: + conn.close() + return jsonify({'error': 'Unauthorized'}), 403 + + # Get all original images + originals = conn.execute( + "SELECT image_index, filename FROM images WHERE session_id = ? AND image_type = 'original' ORDER BY image_index", + (session_id,) + ).fetchall() + + # Get processed status + processed = conn.execute( + "SELECT image_index, processed_filename FROM images WHERE session_id = ? AND image_type = 'color_rm'", + (session_id,) + ).fetchall() + + processed_map = {row['image_index']: row['processed_filename'] for row in processed} + + images_list = [] + for img in originals: + idx = img['image_index'] + p_filename = processed_map.get(idx) + + images_list.append({ + 'index': idx, + 'page_number': idx + 1, + 'original_url': url_for('main.serve_image', folder='uploads', filename=img['filename']), + 'processed_url': url_for('main.serve_processed_file', filename=p_filename) if p_filename else None, + 'is_processed': bool(p_filename) + }) + + conn.close() + return jsonify({'images': images_list}) + +@main_bp.route('/cropv2//') +@login_required +def crop_interface_v2(session_id, image_index): + conn = get_db_connection() + + # Security: Check ownership of the session + session_owner = conn.execute('SELECT user_id FROM sessions WHERE id = ?', (session_id,)).fetchone() + if not session_owner or session_owner['user_id'] != current_user.id: + conn.close() + return "Unauthorized", 403 + + image_info = conn.execute( + "SELECT * FROM images WHERE session_id = ? AND image_index = ? AND image_type = 'original'", + (session_id, image_index) + ).fetchone() + + if not image_info: + conn.close() + return "Original page/image not found for this session and index.", 404 + + total_pages_result = conn.execute( + "SELECT COUNT(*) FROM images WHERE session_id = ? AND image_type = 'original'", + (session_id,) + ).fetchone() + total_pages = total_pages_result[0] if total_pages_result else 0 + + # Fetch all pages for the slider + all_pages_rows = conn.execute( + "SELECT image_index, filename FROM images WHERE session_id = ? AND image_type = 'original' ORDER BY image_index ASC", + (session_id,) + ).fetchall() + all_pages = [{'image_index': row['image_index'], 'filename': row['filename']} for row in all_pages_rows] + + conn.close() + + return render_template( + 'cropv2.html', + session_id=session_id, + user_id=current_user.id, # Pass user ID to template + image_index=image_index, + image_info=image_info, + total_pages=total_pages, + all_pages=all_pages # Pass all pages for the slider + ) + +@main_bp.route(ROUTE_PROCESS_CROP_V2, methods=[METHOD_POST]) +@login_required +def process_crop_v2(): + data = request.json + session_id, page_index, boxes_data, image_data_url = data['session_id'], data['image_index'], data['boxes'], data.get('imageData') + + conn = get_db_connection() + + # Security: Check ownership of the session + session_owner = conn.execute('SELECT user_id FROM sessions WHERE id = ?', (session_id,)).fetchone() + if not session_owner or session_owner['user_id'] != current_user.id: + conn.close() + return jsonify({'error': 'Unauthorized'}), 403 + + page_info = conn.execute( + "SELECT filename FROM images WHERE session_id = ? AND image_index = ? AND image_type = 'original'", + (session_id, page_index) + ).fetchone() + + if not page_info: + conn.close() + return jsonify({'error': 'Original page not found in session'}), 404 + + try: + header, encoded = image_data_url.split(",", 1) + image_data = base64.b64decode(encoded) + + temp_filename = f"temp_filtered_{page_info['filename']}" + temp_path = os.path.join(current_app.config['PROCESSED_FOLDER'], temp_filename) + with open(temp_path, "wb") as f: f.write(image_data) + + existing_cropped = conn.execute( + "SELECT id, processed_filename FROM images WHERE session_id = ? AND filename = ? AND image_type = 'cropped'", + (session_id, page_info['filename']) + ).fetchall() + + for cropped_img in existing_cropped: + if cropped_img['processed_filename']: + try: os.remove(os.path.join(current_app.config['PROCESSED_FOLDER'], cropped_img['processed_filename'])) + except OSError: pass + conn.execute("DELETE FROM questions WHERE session_id = ? AND image_id = ?", (session_id, cropped_img['id'])) + + conn.execute( + "DELETE FROM images WHERE session_id = ? AND filename = ? AND image_type = 'cropped'", + (session_id, page_info['filename']) + ) + + # Identify boxes on the current page that are acting as sources for other boxes + # This prevents them from being saved as standalone questions if they are merged + local_source_ids = set() + for box in boxes_data: + if box.get('remote_stitch_source'): + src = box['remote_stitch_source'] + if src.get('page_index') == page_index: + # The source box is on this page. Add its ID to the ignore list. + # Note: src['box'] might handle ID as string or int, ensure consistency if needed + local_source_ids.add(src['box']['id']) + + primary_boxes = [box for box in boxes_data if not box.get('stitch_to')] + processed_boxes = [] + + for i, primary_box in enumerate(primary_boxes): + # Skip if this box is being consumed by another box on the same page + if primary_box['id'] in local_source_ids: + continue + + # --- CROSS-PAGE STITCHING LOGIC --- + if primary_box.get('remote_stitch_source'): + source_info = primary_box['remote_stitch_source'] + source_page_index = source_info['page_index'] + source_box = source_info['box'] + + # Attempt to delete the original source image/question to prevent duplicates + # We use the unique box ID provided by the frontend + if 'id' in source_box: + source_box_id = str(source_box['id']) + # Find the image entry + source_img_row = conn.execute( + "SELECT id, processed_filename FROM images WHERE session_id = ? AND box_id = ?", + (session_id, source_box_id) + ).fetchone() + + if source_img_row: + # Delete associated question if any + conn.execute("DELETE FROM questions WHERE image_id = ?", (source_img_row['id'],)) + # Delete the image entry + conn.execute("DELETE FROM images WHERE id = ?", (source_img_row['id'],)) + # Optionally delete the file, but might be risky if logic is flawed. + # Leaving file cleanup to general cleanup or overwrite. + + # Fetch source page filename + source_page_db = conn.execute( + "SELECT filename FROM images WHERE session_id = ? AND image_index = ? AND image_type = 'original'", + (session_id, source_page_index) + ).fetchone() + + if source_page_db: + source_filename = source_page_db['filename'] + source_path = os.path.join(current_app.config['UPLOAD_FOLDER'], source_filename) + + if os.path.exists(source_path): + # Crop Source (Parent) + src_points = [ + {'x': source_box['x'], 'y': source_box['y']}, + {'x': source_box['x'] + source_box['w'], 'y': source_box['y']}, + {'x': source_box['x'] + source_box['w'], 'y': source_box['y'] + source_box['h']}, + {'x': source_box['x'], 'y': source_box['y'] + source_box['h']} + ] + # We use the original source file for the parent crop + parent_crop = crop_image_perspective(source_path, src_points) + + # Crop Current (Child) + child_points = [ + {'x': primary_box['x'], 'y': primary_box['y']}, + {'x': primary_box['x'] + primary_box['w'], 'y': primary_box['y']}, + {'x': primary_box['x'] + primary_box['w'], 'y': primary_box['y'] + primary_box['h']}, + {'x': primary_box['x'], 'y': primary_box['y'] + primary_box['h']} + ] + child_crop = crop_image_perspective(temp_path, child_points) + + # Stitch (Parent Top, Child Bottom) + h1, w1 = parent_crop.shape[:2] + h2, w2 = child_crop.shape[:2] + max_width = max(w1, w2) + + stitched_image = np.full((h1 + h2, max_width, 3), 255, dtype=np.uint8) + + x_offset1 = (max_width - w1) // 2 + stitched_image[0:h1, x_offset1:x_offset1 + w1] = parent_crop + + x_offset2 = (max_width - w2) // 2 + stitched_image[h1:h1 + h2, x_offset2:x_offset2 + w2] = child_crop + else: + # Fallback if source file missing + current_app.logger.error(f"Source file missing for stitch: {source_path}") + # Just crop the child + points = [ + {'x': primary_box['x'], 'y': primary_box['y']}, + {'x': primary_box['x'] + primary_box['w'], 'y': primary_box['y']}, + {'x': primary_box['x'] + primary_box['w'], 'y': primary_box['y'] + primary_box['h']}, + {'x': primary_box['x'], 'y': primary_box['y'] + primary_box['h']} + ] + stitched_image = crop_image_perspective(temp_path, points) + else: + # Fallback if db lookup fails + current_app.logger.error(f"Source page DB record missing: session {session_id} index {source_page_index}") + points = [ + {'x': primary_box['x'], 'y': primary_box['y']}, + {'x': primary_box['x'] + primary_box['w'], 'y': primary_box['y']}, + {'x': primary_box['x'] + primary_box['w'], 'y': primary_box['y'] + primary_box['h']}, + {'x': primary_box['x'], 'y': primary_box['y'] + primary_box['h']} + ] + stitched_image = crop_image_perspective(temp_path, points) + + # --- STANDARD LOCAL STITCHING OR SINGLE BOX LOGIC --- + else: + children = [box for box in boxes_data if box.get('stitch_to') == primary_box['id']] + + points = [ + {'x': primary_box['x'], 'y': primary_box['y']}, + {'x': primary_box['x'] + primary_box['w'], 'y': primary_box['y']}, + {'x': primary_box['x'] + primary_box['w'], 'y': primary_box['y'] + primary_box['h']}, + {'x': primary_box['x'], 'y': primary_box['y'] + primary_box['h']} + ] + primary_crop = crop_image_perspective(temp_path, points) + + stitched_image = primary_crop + + if children: + child = children[0] + child_points = [ + {'x': child['x'], 'y': child['y']}, + {'x': child['x'] + child['w'], 'y': child['y']}, + {'x': child['x'] + child['w'], 'y': child['y'] + child['h']}, + {'x': child['x'], 'y': child['y'] + child['h']} + ] + child_crop = crop_image_perspective(temp_path, child_points) + + h1, w1 = primary_crop.shape[:2] + h2, w2 = child_crop.shape[:2] + max_width = max(w1, w2) + + stitched_image = np.full((h1 + h2, max_width, 3), 255, dtype=np.uint8) + + x_offset1 = (max_width - w1) // 2 + stitched_image[0:h1, x_offset1:x_offset1 + w1] = primary_crop + + x_offset2 = (max_width - w2) // 2 + stitched_image[h1:h1 + h2, x_offset2:x_offset2 + w2] = child_crop + + crop_filename = f"processed_{session_id}_page{page_index}_crop{i}.jpg" + crop_path = os.path.join(current_app.config['PROCESSED_FOLDER'], crop_filename) + cv2.imwrite(crop_path, stitched_image) + + processed_boxes.append({ + 'original_filename': page_info['filename'], + 'original_name': f"Page {page_index + 1} - Q{i + 1}", + 'processed_filename': crop_filename, + 'box_id': str(primary_box['id']), # Store box ID for future stitching reference + 'question_number': primary_box.get('question_number'), + 'status': primary_box.get('status'), + 'marked_solution': primary_box.get('marked_solution'), + 'actual_solution': primary_box.get('actual_solution') + }) + + max_index_result = conn.execute('SELECT MAX(image_index) FROM images WHERE session_id = ?', (session_id,)).fetchone() + next_index = (max_index_result[0] if max_index_result[0] is not None else -1) + 1 + + for i, p_box in enumerate(processed_boxes): + conn.execute( + 'INSERT INTO images (session_id, image_index, filename, original_name, processed_filename, image_type, box_id) VALUES (?, ?, ?, ?, ?, ?, ?)', + (session_id, next_index + i, p_box['original_filename'], p_box['original_name'], p_box['processed_filename'], 'cropped', p_box['box_id']) + ) + image_id = conn.execute('SELECT last_insert_rowid()').fetchone()[0] + + conn.execute( + """INSERT INTO questions + (session_id, image_id, question_number, status, marked_solution, actual_solution) + VALUES (?, ?, ?, ?, ?, ?)""", + ( + session_id, + image_id, + p_box.get('question_number'), + p_box.get('status', 'unattempted'), + p_box.get('marked_solution'), + p_box.get('actual_solution') + ) + ) + + conn.commit() + conn.close() + os.remove(temp_path) + + return jsonify({'success': True, 'processed_count': len(processed_boxes)}) + + except Exception as e: + conn.rollback() + conn.close() + print(f"V2 Processing error: {e}") + return jsonify({'error': f'Processing failed: {str(e)}'}), 500 + +@main_bp.route('/color_rm') +@login_required +def color_rm_entry(): + return render_template('color_rm_upload.html') + +@main_bp.route('/color_rm_interface//') +@login_required +def color_rm_interface(session_id, image_index): + conn = get_db_connection() + + # Security: Check ownership of the session + session_owner = conn.execute('SELECT user_id FROM sessions WHERE id = ?', (session_id,)).fetchone() + if not session_owner or session_owner['user_id'] != current_user.id: + conn.close() + return "Unauthorized", 403 + + image_info = conn.execute( + "SELECT * FROM images WHERE session_id = ? AND image_index = ? AND image_type = 'original'", + (session_id, image_index) + ).fetchone() + + if not image_info: + conn.close() + return "Original page/image not found for this session and index.", 404 + + total_pages_result = conn.execute( + "SELECT COUNT(*) FROM images WHERE session_id = ? AND image_type = 'original'", + (session_id,) + ).fetchone() + total_pages = total_pages_result[0] if total_pages_result else 0 + + conn.close() + + return render_template( + 'color_rm.html', + session_id=session_id, + user_id=current_user.id, # Pass user ID to template + image_index=image_index, + image_info=dict(image_info), + total_pages=total_pages + ) + +@main_bp.route('/process_color_rm', methods=['POST']) +@login_required +def process_color_rm(): + data = request.json + session_id = data.get('session_id') + image_index = data.get('image_index') + image_data_url = data.get('imageData') + + conn = get_db_connection() + + # Security: Check ownership of the session + session_owner = conn.execute('SELECT user_id FROM sessions WHERE id = ?', (session_id,)).fetchone() + if not session_owner or session_owner['user_id'] != current_user.id: + conn.close() + return jsonify({'error': 'Unauthorized'}), 403 + + page_info = conn.execute( + "SELECT filename, original_name FROM images WHERE session_id = ? AND image_index = ? AND image_type = 'original'", + (session_id, image_index) + ).fetchone() + + if not page_info: + conn.close() + return jsonify({'error': 'Original page not found'}), 404 + + try: + header, encoded = image_data_url.split(",", 1) + image_data = base64.b64decode(encoded) + + processed_filename = f"color_rm_{session_id}_{image_index}_{datetime.now().strftime('%H%M%S')}.png" + processed_path = os.path.join(current_app.config['PROCESSED_FOLDER'], processed_filename) + + with open(processed_path, "wb") as f: + f.write(image_data) + + # Insert into DB so serve_processed_file allows access + conn.execute( + 'INSERT INTO images (session_id, image_index, filename, original_name, processed_filename, image_type) VALUES (?, ?, ?, ?, ?, ?)', + (session_id, image_index, page_info['filename'], page_info['original_name'], processed_filename, 'color_rm') + ) + conn.commit() + + return jsonify({ + 'success': True, + 'filename': processed_filename, + 'url': url_for('main.serve_processed_file', filename=processed_filename) + }) + + except Exception as e: + return jsonify({'error': str(e)}), 500 + finally: + conn.close() + +@main_bp.route('/question_entry_v2/') +@login_required +def question_entry_v2(session_id): + conn = get_db_connection() + + # Fetch session metadata, ensuring it belongs to the current user + session_data = conn.execute( + 'SELECT original_filename, subject, tags, notes FROM sessions WHERE id = ? AND user_id = ?', (session_id, current_user.id) + ).fetchone() + + if not session_data: + conn.close() + flash("Session not found or you don't have permission to access it.", "warning") + return redirect(url_for('dashboard.dashboard')) + + # Fetch images and associated questions + images = conn.execute( + """SELECT i.id, i.processed_filename, q.question_number, q.status, q.marked_solution, q.actual_solution + FROM images i + LEFT JOIN questions q ON i.id = q.image_id + WHERE i.session_id = ? AND i.image_type = 'cropped' + ORDER BY i.id""", + (session_id,) + ).fetchall() + + # Count classified questions (those with both subject and chapter) + classification_count = conn.execute( + """SELECT COUNT(*) as count + FROM images i + LEFT JOIN questions q ON i.id = q.image_id + WHERE i.session_id = ? AND i.image_type = 'cropped' + AND q.subject IS NOT NULL AND q.chapter IS NOT NULL""", + (session_id,) + ).fetchone()['count'] + + classified_count = classification_count + + conn.close() + + if not images: + return "No questions were created from the PDF. Please go back and draw crop boxes.", 404 + + return render_template('question_entry_v2.html', + session_id=session_id, + images=[dict(img) for img in images], + session_data=dict(session_data) if session_data else {}, + classified_count=classified_count, + total_questions=len(images), + nvidia_nim_available=NVIDIA_NIM_AVAILABLE) + +@main_bp.route('/old/dashboard') +@login_required +def old_dashboard(): + # Redirect to the main dashboard to avoid duplicate code + return redirect(url_for('dashboard.dashboard')) + +@main_bp.route('/delete_session/', methods=[METHOD_DELETE]) +@login_required +def delete_session(session_id): + try: + conn = get_db_connection() + # Security: Check ownership of the session + session_owner = conn.execute('SELECT user_id FROM sessions WHERE id = ?', (session_id,)).fetchone() + if not session_owner or session_owner['user_id'] != current_user.id: + conn.close() + return jsonify({'error': 'Unauthorized'}), 403 + + images_to_delete = conn.execute('SELECT filename, processed_filename FROM images WHERE session_id = ?', (session_id,)).fetchall() + for img in images_to_delete: + if img['filename']: + try: os.remove(os.path.join(current_app.config['UPLOAD_FOLDER'], img['filename'])) + except OSError: pass + if img['processed_filename']: + try: os.remove(os.path.join(current_app.config['PROCESSED_FOLDER'], img['processed_filename'])) + except OSError: pass + + conn.execute('DELETE FROM questions WHERE session_id = ?', (session_id,)) + conn.execute('DELETE FROM images WHERE session_id = ?', (session_id,)) + conn.execute('DELETE FROM sessions WHERE id = ?', (session_id,)) + + conn.commit() + conn.close() + + return jsonify({'success': True}) + except Exception as e: + return jsonify({'error': str(e)}), 500 + +@main_bp.route('/toggle_persist/', methods=[METHOD_POST]) +@login_required +def toggle_persist(session_id): + try: + conn = get_db_connection() + # Security: Check ownership of the session + session_owner = conn.execute('SELECT user_id FROM sessions WHERE id = ?', (session_id,)).fetchone() + if not session_owner or session_owner['user_id'] != current_user.id: + conn.close() + return jsonify({'error': 'Unauthorized'}), 403 + + current_status_res = conn.execute('SELECT persist FROM sessions WHERE id = ?', (session_id,)).fetchone() + + if not current_status_res: + conn.close() + return jsonify({'error': 'Session not found'}), 404 + + new_status = 1 - current_status_res['persist'] + conn.execute('UPDATE sessions SET persist = ? WHERE id = ?', (new_status, session_id)) + + pdf_res = conn.execute('SELECT id FROM generated_pdfs WHERE session_id = ?', (session_id,)).fetchone() + if pdf_res: + conn.execute('UPDATE generated_pdfs SET persist = ? WHERE id = ?', (new_status, pdf_res['id'])) + + conn.commit() + conn.close() + + return jsonify({'success': True, 'status': 'persisted' if new_status == 1 else 'not_persisted'}) + except Exception as e: + print(f"Error in toggle_persist: {e}") + conn.rollback() + conn.close() + return jsonify({'error': str(e)}), 500 + +@main_bp.route('/rename_session/', methods=['POST']) +@login_required +def rename_session(session_id): + data = request.json + new_name = data.get('new_name') + + if not new_name: + return jsonify({'error': 'New name is required'}), 400 + + try: + conn = get_db_connection() + # Security: Check ownership of the session + session_owner = conn.execute('SELECT user_id FROM sessions WHERE id = ?', (session_id,)).fetchone() + if not session_owner or session_owner['user_id'] != current_user.id: + conn.close() + return jsonify({'error': 'Unauthorized'}), 403 + + conn.execute('UPDATE sessions SET name = ? WHERE id = ?', (new_name, session_id)) + conn.commit() + conn.close() + return jsonify({'success': True}) + except Exception as e: + return jsonify({'error': str(e)}), 500 + +@main_bp.route('/delete_question/', methods=[METHOD_DELETE]) +@login_required +def delete_question(image_id): + try: + conn = get_db_connection() + # Security: Check ownership of the image via the session + image_owner = conn.execute(""" + SELECT s.user_id FROM images i + JOIN sessions s ON i.session_id = s.id + WHERE i.id = ? + """, (image_id,)).fetchone() + + if not image_owner or image_owner['user_id'] != current_user.id: + conn.close() + return jsonify({'error': 'Unauthorized'}), 403 + + image_info = conn.execute( + 'SELECT session_id, filename, processed_filename FROM images WHERE id = ?', + (image_id,) + ).fetchone() + + if not image_info: + conn.close() + return jsonify({'error': 'Question not found'}), 404 + + conn.execute('DELETE FROM questions WHERE image_id = ?', (image_id,)) + conn.execute('DELETE FROM images WHERE id = ?', (image_id,)) + + conn.commit() + conn.close() + + return jsonify({'success': True}) + except Exception as e: + return jsonify({'error': str(e)}), 500 + +from rich.table import Table +from rich.console import Console + +@main_bp.route(ROUTE_SAVE_QUESTIONS, methods=[METHOD_POST]) +@login_required +def save_questions(): + data = request.json + session_id = data['session_id'] + questions = data['questions'] + pdf_subject = data.get('pdf_subject', '') + pdf_tags = data.get('pdf_tags', '') + pdf_notes = data.get('pdf_notes', '') + + conn = get_db_connection() + # Security: Check ownership of the session + session_owner = conn.execute('SELECT user_id FROM sessions WHERE id = ?', (session_id,)).fetchone() + if not session_owner or session_owner['user_id'] != current_user.id: + conn.close() + return jsonify({'error': 'Unauthorized'}), 403 + + # Update session metadata + conn.execute( + 'UPDATE sessions SET subject = ?, tags = ?, notes = ? WHERE id = ?', + (pdf_subject, pdf_tags, pdf_notes, session_id) + ) + + # Delete and re-insert questions + conn.execute('DELETE FROM questions WHERE session_id = ?', (session_id,)) + + questions_to_insert = [] + for q in questions: + questions_to_insert.append(( + session_id, + q['image_id'], + q['question_number'], + "", # subject column in questions table - can be removed later + q['status'], + q.get('marked_solution', ""), + q.get('actual_solution', ""), + q.get('time_taken', ""), + pdf_tags # Save tags with each question too + )) + + conn.executemany( + 'INSERT INTO questions (session_id, image_id, question_number, subject, status, marked_solution, actual_solution, time_taken, tags) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)', + questions_to_insert + ) + + conn.commit() + conn.close() + + return jsonify({'success': True, 'message': 'Questions saved successfully.'}) + +@main_bp.route(ROUTE_EXTRACT_QUESTION_NUMBER, methods=[METHOD_POST]) +@login_required +def extract_question_number(): + if not NVIDIA_NIM_AVAILABLE: + return jsonify({'error': 'NVIDIA NIM feature is not available. Please set the NVIDIA_API_KEY environment variable.'}), 400 + + data = request.json + image_id = data.get('image_id') + + if not image_id: + return jsonify({'error': 'Missing image_id parameter'}), 400 + + try: + conn = get_db_connection() + # Security: Check ownership of the image via the session + image_owner = conn.execute(""" + SELECT s.user_id FROM images i + JOIN sessions s ON i.session_id = s.id + WHERE i.id = ? + """, (image_id,)).fetchone() + + if not image_owner or image_owner['user_id'] != current_user.id: + conn.close() + return jsonify({'error': 'Unauthorized'}), 403 + + image_info = conn.execute( + 'SELECT processed_filename FROM images WHERE id = ?', + (image_id,) + ).fetchone() + conn.close() + + if not image_info or not image_info['processed_filename']: + return jsonify({'error': 'Image not found or not processed'}), 404 + + image_path = os.path.join(current_app.config['PROCESSED_FOLDER'], image_info['processed_filename']) + if not os.path.exists(image_path): + return jsonify({'error': 'Image file not found on disk'}), 404 + + image_bytes = resize_image_if_needed(image_path) + ocr_result = call_nim_ocr_api(image_bytes) + question_number = extract_question_number_from_ocr_result(ocr_result) + + return jsonify({ + 'success': True, + 'question_number': question_number, + 'image_id': image_id + }) + + except Exception as e: + return jsonify({'error': f'Failed to extract question number: {str(e)}'}), 500 + +@main_bp.route(ROUTE_EXTRACT_ALL_QUESTION_NUMBERS, methods=[METHOD_POST]) +@login_required +def extract_all_question_numbers(): + if not NVIDIA_NIM_AVAILABLE: + return jsonify({'error': 'NVIDIA NIM feature is not available.'}), 400 + + data = request.json + session_id = data.get('session_id') + + if not session_id: + return jsonify({'error': 'Missing session_id parameter'}), 400 + + try: + conn = get_db_connection() + # Security: Check ownership of the session + session_owner = conn.execute('SELECT user_id FROM sessions WHERE id = ?', (session_id,)).fetchone() + if not session_owner or session_owner['user_id'] != current_user.id: + conn.close() + return jsonify({'error': 'Unauthorized'}), 403 + + images = conn.execute( + "SELECT id, processed_filename FROM images WHERE session_id = ? AND image_type = 'cropped' ORDER BY id", + (session_id,) + ).fetchall() + conn.close() + + if not images: + return jsonify({'error': 'No cropped images found in session'}), 404 + + results = [] + errors = [] + + MAX_CONCURRENT_REQUESTS = 5 + processed_count = 0 + + for image in images: + if processed_count >= MAX_CONCURRENT_REQUESTS: + import time + time.sleep(1) + processed_count = 0 + + try: + image_id = image['id'] + processed_filename = image['processed_filename'] + + if not processed_filename: + errors.append({'image_id': image_id, 'error': 'Image not processed'}) + continue + + image_path = os.path.join(current_app.config['PROCESSED_FOLDER'], processed_filename) + if not os.path.exists(image_path): + errors.append({'image_id': image_id, 'error': 'Image file not found on disk'}) + continue + + image_bytes = resize_image_if_needed(image_path) + ocr_result = call_nim_ocr_api(image_bytes) + question_number = extract_question_number_from_ocr_result(ocr_result) + + results.append({ + 'image_id': image_id, + 'question_number': question_number + }) + + processed_count += 1 + + except Exception as e: + errors.append({'image_id': image['id'], 'error': str(e)}) + + return jsonify({ + 'success': True, + 'results': results, + 'errors': errors + }) + + except Exception as e: + return jsonify({'error': f'Failed to extract question numbers: {str(e)}'}), 500 + +@main_bp.route('/get_all_subjects_and_tags') +@login_required +def get_all_subjects_and_tags(): + conn = get_db_connection() + subjects = [row['subject'] for row in conn.execute('SELECT DISTINCT subject FROM generated_pdfs WHERE subject IS NOT NULL AND user_id = ?', (current_user.id,)).fetchall()] + tags_query = conn.execute('SELECT DISTINCT tags FROM generated_pdfs WHERE tags IS NOT NULL AND tags != \'\' AND user_id = ?', (current_user.id,)).fetchall() + all_tags = set() + for row in tags_query: + tags = [tag.strip() for tag in row['tags'].split(',')] + all_tags.update(tags) + conn.close() + return jsonify({ + 'subjects': sorted(subjects), + 'tags': sorted(list(all_tags)) + }) + +@main_bp.route('/get_metadata_suggestions') +@login_required +def get_metadata_suggestions(): + conn = get_db_connection() + subjects = [row['subject'] for row in conn.execute('SELECT DISTINCT subject FROM generated_pdfs WHERE subject IS NOT NULL AND user_id = ?', (current_user.id,)).fetchall()] + tags_query = conn.execute('SELECT DISTINCT tags FROM generated_pdfs WHERE tags IS NOT NULL AND tags != \'\' AND user_id = ?', (current_user.id,)).fetchall() + all_tags = set() + for row in tags_query: + tags = [tag.strip() for tag in row['tags'].split(',')] + all_tags.update(tags) + conn.close() + return jsonify({ + 'subjects': sorted(subjects), + 'tags': sorted(list(all_tags)) + }) + +@main_bp.route('/generate_preview', methods=[METHOD_POST]) +@login_required +def generate_preview(): + data = request.json + session_id = data['session_id'] + + conn = get_db_connection() + # Security: Check ownership of the session + session_owner = conn.execute('SELECT user_id FROM sessions WHERE id = ?', (session_id,)).fetchone() + if not session_owner or session_owner['user_id'] != current_user.id: + conn.close() + return jsonify({'error': 'Unauthorized'}), 403 + + query = """ + SELECT q.*, i.filename, i.processed_filename FROM questions q + JOIN images i ON q.image_id = i.id + WHERE q.session_id = ? ORDER BY i.id + """ + all_questions = [dict(row) for row in conn.execute(query, (session_id,)).fetchall()] + conn.close() + + miscellaneous_questions = data.get('miscellaneous_questions', []) + all_questions.extend(miscellaneous_questions) + + filter_type = data.get('filter_type', 'all') + filtered_questions = [ + q for q in all_questions if filter_type == 'all' or q['status'] == filter_type + ] + + if not filtered_questions: + return jsonify({'error': 'No questions match the filter criteria'}), 400 + + # For preview, we only need the first page + images_per_page = int(data.get('images_per_page', 4)) + preview_questions = filtered_questions[:images_per_page] + + practice_mode = data.get('practice_mode', 'none') + practice_mode_settings = { + 'portrait_2': {'images_per_page': 2, 'orientation': 'portrait', 'grid_rows': 2, 'grid_cols': 1}, + 'portrait_3': {'images_per_page': 3, 'orientation': 'portrait', 'grid_rows': 3, 'grid_cols': 1}, + 'landscape_2': {'images_per_page': 2, 'orientation': 'landscape', 'grid_rows': 2, 'grid_cols': 1}, + 'portrait_2_spacious': {'images_per_page': 2, 'orientation': 'portrait', 'grid_rows': 2, 'grid_cols': 1} + } + + if practice_mode in practice_mode_settings: + settings = practice_mode_settings[practice_mode] + images_per_page = settings['images_per_page'] + orientation = settings['orientation'] + grid_rows = settings['grid_rows'] + grid_cols = settings['grid_cols'] + else: + images_per_page = int(data.get('images_per_page', 4)) + orientation = data.get('orientation', 'portrait') + grid_rows = int(data.get('grid_rows')) if data.get('grid_rows') else None + grid_cols = int(data.get('grid_cols')) if data.get('grid_cols') else None + + font_size_scale = float(data.get('font_size_scale', 1.0)) + + pdf_bytes = create_a4_pdf_from_images( + preview_questions, + current_app.config['PROCESSED_FOLDER'], + output_filename=None, + images_per_page=images_per_page, + output_folder=None, + orientation=orientation, + grid_rows=grid_rows, + grid_cols=grid_cols, + practice_mode=practice_mode, + return_bytes=True, + font_size_scale=font_size_scale + ) + + if pdf_bytes: + # Convert PDF bytes to image for preview + try: + pdf_document = fitz.open(stream=pdf_bytes, filetype="pdf") + first_page = pdf_document.load_page(0) + pix = first_page.get_pixmap(dpi=150) # Lower DPI for faster preview + img_bytes = pix.tobytes("png") # Use tobytes() instead of save() + + img_base64 = base64.b64encode(img_bytes).decode('utf-8') + + return jsonify({'success': True, 'preview_image': f'data:image/png;base64,{img_base64}'}) + + except Exception as e: + return jsonify({'error': f'Failed to generate preview image: {str(e)}'}), 500 + finally: + if 'pdf_document' in locals() and pdf_document: + pdf_document.close() + else: + return jsonify({'error': 'PDF generation for preview failed'}), 500 + +@main_bp.route(ROUTE_GENERATE_PDF, methods=[METHOD_POST]) +@login_required +def generate_pdf(): + data = request.json + session_id = data['session_id'] + + conn = get_db_connection() + # Security: Check ownership of the session + session_owner = conn.execute('SELECT user_id FROM sessions WHERE id = ?', (session_id,)).fetchone() + if not session_owner or session_owner['user_id'] != current_user.id: + conn.close() + return jsonify({'error': 'Unauthorized'}), 403 + + query = """ + SELECT q.*, i.filename, i.processed_filename FROM questions q + JOIN images i ON q.image_id = i.id + WHERE q.session_id = ? ORDER BY i.id + """ + all_questions = [dict(row) for row in conn.execute(query, (session_id,)).fetchall()] + + miscellaneous_questions = data.get('miscellaneous_questions', []) + all_questions.extend(miscellaneous_questions) + + filter_type = data.get('filter_type', 'all') + filtered_questions = [ + q for q in all_questions if filter_type == 'all' or q['status'] == filter_type + ] + + if not filtered_questions: + conn.close() + return jsonify({'error': 'No questions match the filter criteria'}), 400 + + pdf_filename = f"{secure_filename(data.get('pdf_name', 'analysis'))}_{datetime.now().strftime('%Y%m%d_%H%M')}.pdf" + + practice_mode = data.get('practice_mode', 'none') + practice_mode_settings = { + 'portrait_2': {'images_per_page': 2, 'orientation': 'portrait', 'grid_rows': 2, 'grid_cols': 1}, + 'portrait_3': {'images_per_page': 3, 'orientation': 'portrait', 'grid_rows': 3, 'grid_cols': 1}, + 'landscape_2': {'images_per_page': 2, 'orientation': 'landscape', 'grid_rows': 2, 'grid_cols': 1}, + 'portrait_2_spacious': {'images_per_page': 2, 'orientation': 'portrait', 'grid_rows': 2, 'grid_cols': 1} + } + + if practice_mode in practice_mode_settings: + settings = practice_mode_settings[practice_mode] + images_per_page = settings['images_per_page'] + orientation = settings['orientation'] + grid_rows = settings['grid_rows'] + grid_cols = settings['grid_cols'] + else: + images_per_page = int(data.get('images_per_page', 4)) + orientation = data.get('orientation', 'portrait') + grid_rows = int(data.get('grid_rows')) if data.get('grid_rows') else None + grid_cols = int(data.get('grid_cols')) if data.get('grid_cols') else None + + font_size_scale = float(data.get('font_size_scale', 1.0)) + + if create_a4_pdf_from_images(filtered_questions, current_app.config['PROCESSED_FOLDER'], pdf_filename, images_per_page, current_app.config['OUTPUT_FOLDER'], orientation, grid_rows, grid_cols, practice_mode, font_size_scale=font_size_scale): + session_info = conn.execute('SELECT original_filename FROM sessions WHERE id = ?', (session_id,)).fetchone() + source_filename = session_info['original_filename'] if session_info else 'Unknown' + + conn.execute( + 'INSERT INTO generated_pdfs (session_id, filename, subject, tags, notes, source_filename, user_id) VALUES (?, ?, ?, ?, ?, ?, ?)', + (session_id, pdf_filename, data.get('subject'), data.get('tags'), data.get('notes'), source_filename, current_user.id) + ) + conn.commit() + conn.close() + return jsonify({'success': True, 'pdf_filename': pdf_filename}) + else: + conn.close() + return jsonify({'error': 'PDF generation failed'}), 500 + +@main_bp.route('/download/') +def download_file(filename): + return send_file(os.path.join(current_app.config['OUTPUT_FOLDER'], filename), as_attachment=True) + +@main_bp.route('/view_pdf/') +def view_pdf(filename): + return send_file(os.path.join(current_app.config['OUTPUT_FOLDER'], filename), as_attachment=False) + +@main_bp.route('/view_pdf_v2/') +def view_pdf_v2(filename): + return render_template('pdfjs_viewer.html', pdf_url=url_for('main.view_pdf', filename=filename), pdf_title=filename) + +@main_bp.route('/viewpdflegacy/') +def view_pdf_legacy(pdf_id): + conn = get_db_connection() + pdf_info = conn.execute('SELECT filename, subject FROM generated_pdfs WHERE id = ?', (pdf_id,)).fetchone() + conn.close() + + if not pdf_info: + return "PDF not found", 404 + + pdf_filename = pdf_info['filename'] + pdf_subject = pdf_info['subject'] + pdf_path = os.path.join(current_app.config['OUTPUT_FOLDER'], pdf_filename) + + if not os.path.exists(pdf_path): + return "PDF file not found on disk", 404 + + image_paths = [] + try: + doc = fitz.open(pdf_path) + for i in range(0, doc.page_count, 2): + # Get first page + page1 = doc.load_page(i) + pix1 = page1.get_pixmap(dpi=current_user.dpi) + + # Check for second page + if i + 1 < doc.page_count: + page2 = doc.load_page(i + 1) + pix2 = page2.get_pixmap(dpi=current_user.dpi) + + # Convert pixmaps to numpy arrays for easier manipulation + img1 = np.frombuffer(pix1.samples, dtype=np.uint8).reshape(pix1.h, pix1.w, pix1.n) + img2 = np.frombuffer(pix2.samples, dtype=np.uint8).reshape(pix2.h, pix2.w, pix2.n) + + # Ensure both images have 3 channels (RGB) for consistent stacking + if img1.shape[2] == 4: # RGBA + img1 = cv2.cvtColor(img1, cv2.COLOR_RGBA2RGB) + if img2.shape[2] == 4: # RGBA + img2 = cv2.cvtColor(img2, cv2.COLOR_RGBA2RGB) + + # Pad images to have the same height if necessary + max_h = max(img1.shape[0], img2.shape[0]) + if img1.shape[0] < max_h: + img1 = np.pad(img1, ((0, max_h - img1.shape[0]), (0, 0), (0, 0)), mode='constant', constant_values=255) + if img2.shape[0] < max_h: + img2 = np.pad(img2, ((0, max_h - img2.shape[0]), (0, 0), (0, 0)), mode='constant', constant_values=255) + + # Combine images horizontally + combined_img = np.hstack((img1, img2)) + + # Convert back to pixmap for saving + combined_pix = fitz.Pixmap(fitz.csRGB, combined_img.shape[1], combined_img.shape[0], combined_img.tobytes()) + else: + # Only one page, use pix1 directly + combined_pix = pix1 + + temp_image_filename = f"legacy_view_{uuid.uuid4()}_page_{i}_{i+1}.png" + temp_image_path = os.path.join(current_app.config['PROCESSED_FOLDER'], temp_image_filename) + combined_pix.save(temp_image_path) + image_paths.append(url_for('main.serve_image', folder='processed', filename=temp_image_filename)) + doc.close() + except Exception as e: + print(f"Error converting PDF to images: {e}") + return f"Error processing PDF: {str(e)}", 500 + + return render_template('simple_viewer.html', image_urls=image_paths, pdf_title=pdf_subject or pdf_filename) + +@main_bp.route('/image//') +def serve_image(folder, filename): + current_app.logger.info(f"Attempting to serve image from folder '{folder}' with filename '{filename}'") + + folder_map = { + 'uploads': 'UPLOAD_FOLDER', + 'processed': 'PROCESSED_FOLDER', + 'output': 'OUTPUT_FOLDER' + } + + config_key = folder_map.get(folder) + if not config_key: + config_key = f'{folder.upper()}_FOLDER' + + base_folder_path = current_app.config.get(config_key) + + if not base_folder_path: + current_app.logger.error(f"Configuration key '{config_key}' not found.") + return "Not found", 404 + + # The filename can now be either 'session_id/image.jpg' (new) or just 'image.jpg' (old) + # Let's construct the full path and check for its existence + full_path = os.path.abspath(os.path.join(base_folder_path, filename)) + current_app.logger.info(f"Checking for file at primary path: {full_path}") + + # Security check: ensure the path is within the intended directory + if not full_path.startswith(os.path.abspath(base_folder_path)): + current_app.logger.warning(f"Potential directory traversal attempt: {filename}") + return "Forbidden", 403 + + if os.path.exists(full_path): + current_app.logger.info(f"File found at primary path. Serving {filename} from {base_folder_path}") + return send_from_directory(base_folder_path, filename) + else: + # Fallback for old structure: check if the filename itself exists in the root of the processed folder + # This handles cases where filename might be 'session_id/q_1.png' but the file is actually at 'processed/q_1.png' + # or other legacy paths. + parts = filename.split('/') + if len(parts) > 1: + fallback_filename = parts[-1] + fallback_full_path = os.path.abspath(os.path.join(base_folder_path, fallback_filename)) + current_app.logger.info(f"Primary path not found. Checking fallback path: {fallback_full_path}") + if os.path.exists(fallback_full_path): + current_app.logger.info(f"File found at fallback path. Serving {fallback_filename} from {base_folder_path}") + return send_from_directory(base_folder_path, fallback_filename) + + current_app.logger.error(f"File not found at primary or fallback paths for filename: {filename}") + return "Not found", 404 + +@main_bp.route(ROUTE_INDEX) +def index(): + if current_user.is_authenticated: + """ + if getattr(current_user, 'v2_default', 0): + return redirect(url_for('main.upload_final_pdf')) + """ + return redirect(url_for('dashboard.dashboard')) + return redirect(url_for('auth.login')) + +@main_bp.route('/pdf_manager') +@main_bp.route('/pdf_manager/browse/') +@login_required +def pdf_manager(folder_path=''): + conn = get_db_connection() + view_mode = request.args.get('view', 'default') + search_query = request.args.get('search', '') + is_recursive = request.args.get('recursive') == 'true' + + query_params = [current_user.id] + base_query = 'SELECT * FROM generated_pdfs WHERE user_id = ?' + where_clauses = [] + + if search_query: + where_clauses.append('(subject LIKE ? OR tags LIKE ? OR notes LIKE ?)') + search_term = f'%{search_query}%' + query_params.extend([search_term, search_term, search_term]) + + all_view = view_mode == 'all' + folder_id = None + subfolders = [] + breadcrumbs = [] + + if not all_view: + if folder_path: + parts = folder_path.split('/') + parent_id = None + for i, part in enumerate(parts): + res = conn.execute("SELECT id FROM folders WHERE name = ? AND user_id = ? AND (parent_id = ? OR (? IS NULL AND parent_id IS NULL))", (part, current_user.id, parent_id, parent_id)).fetchone() + if not res: + return redirect(url_for('main.pdf_manager')) + parent_id = res['id'] + breadcrumbs.append({'name': part, 'path': '/'.join(parts[:i+1])}) + folder_id = parent_id + + if is_recursive and search_query: + if folder_id: + # Note: get_all_descendant_folder_ids needs to be made user-aware if folders can be nested deeply + # For now, we assume it gets all children regardless of user, but the main query is user-filtered. + descendant_ids = get_all_descendant_folder_ids(conn, folder_id) + all_folder_ids = [folder_id] + descendant_ids + if all_folder_ids: + placeholders = ', '.join('?' * len(all_folder_ids)) + where_clauses.append(f'folder_id IN ({placeholders})') + query_params.extend(all_folder_ids) + else: + if folder_id: + where_clauses.append('folder_id = ?') + query_params.append(folder_id) + else: + where_clauses.append('folder_id IS NULL') + + if folder_id: + subfolders = conn.execute('SELECT * FROM folders WHERE parent_id = ? AND user_id = ? ORDER BY name', (folder_id, current_user.id)).fetchall() + else: + subfolders = conn.execute('SELECT * FROM folders WHERE parent_id IS NULL AND user_id = ? ORDER BY name', (current_user.id,)).fetchall() + + if where_clauses: + base_query += ' AND ' + ' AND '.join(where_clauses) + + base_query += ' ORDER BY created_at DESC' + + pdfs = conn.execute(base_query, query_params).fetchall() + + pdfs_list = [dict(row) for row in pdfs] + subfolders_list = [dict(row) for row in subfolders] + + for pdf in pdfs_list: + if isinstance(pdf['created_at'], str): + try: + pdf['created_at'] = datetime.strptime(pdf['created_at'], '%Y-%m-%d %H:%M:%S') + except ValueError: + pass + + for folder in subfolders_list: + if isinstance(folder['created_at'], str): + try: + folder['created_at'] = datetime.strptime(folder['created_at'], '%Y-%m-%d %H:%M:%S') + except ValueError: + pass + + # get_folder_tree also needs to be user-aware + folder_tree = get_folder_tree(user_id=current_user.id) + conn.close() + + return render_template('pdf_manager.html', + pdfs=pdfs_list, + subfolders=subfolders_list, + current_folder_id=folder_id, + breadcrumbs=breadcrumbs, + all_view=all_view, + folder_tree=folder_tree, + search_query=search_query, + recursive=is_recursive) + +@main_bp.route('/get_pdf_details/') +@login_required +def get_pdf_details(pdf_id): + conn = get_db_connection() + pdf = conn.execute('SELECT * FROM generated_pdfs WHERE id = ? AND user_id = ?', (pdf_id, current_user.id)).fetchone() + conn.close() + if pdf: + return jsonify(dict(pdf)) + return jsonify({'error': 'PDF not found'}), 404 + +@main_bp.route('/update_pdf_details/', methods=[METHOD_POST]) +@login_required +def update_pdf_details(pdf_id): + data = request.json + try: + conn = get_db_connection() + pdf_owner = conn.execute('SELECT user_id FROM generated_pdfs WHERE id = ?', (pdf_id,)).fetchone() + if not pdf_owner or pdf_owner['user_id'] != current_user.id: + conn.close() + return jsonify({'error': 'Unauthorized'}), 403 + + conn.execute( + 'UPDATE generated_pdfs SET subject = ?, tags = ?, notes = ? WHERE id = ?', + (data.get('subject'), data.get('tags'), data.get('notes'), pdf_id) + ) + conn.commit() + conn.close() + return jsonify({'success': True}) + except Exception as e: + return jsonify({'error': str(e)}), 500 + +@main_bp.route('/rename_item', methods=[METHOD_POST]) +@login_required +def rename_item(): + data = request.json + item_type, item_id, new_name = data.get('item_type'), data.get('item_id'), data.get('new_name') + + if not all([item_type, item_id, new_name]): + return jsonify({'error': 'Missing parameters'}), 400 + + conn = get_db_connection() + if item_type == 'folder': + folder_owner = conn.execute('SELECT user_id FROM folders WHERE id = ?', (item_id,)).fetchone() + if not folder_owner or folder_owner['user_id'] != current_user.id: + conn.close(); return jsonify({'error': 'Unauthorized'}), 403 + conn.execute('UPDATE folders SET name = ? WHERE id = ?', (new_name, item_id)) + elif item_type == 'pdf': + pdf_owner = conn.execute('SELECT user_id, filename FROM generated_pdfs WHERE id = ?', (item_id,)).fetchone() + if not pdf_owner or pdf_owner['user_id'] != current_user.id: + conn.close(); return jsonify({'error': 'Unauthorized'}), 403 + + old_filename = pdf_owner['filename'] + if not new_name.lower().endswith('.pdf'): new_name += '.pdf' + new_filename = secure_filename(new_name) + + old_filepath = os.path.join(current_app.config['OUTPUT_FOLDER'], old_filename) + new_filepath = os.path.join(current_app.config['OUTPUT_FOLDER'], new_filename) + + if os.path.exists(new_filepath): conn.close(); return jsonify({'error': 'A file with this name already exists'}), 400 + + try: + os.rename(old_filepath, new_filepath) + conn.execute('UPDATE generated_pdfs SET filename = ? WHERE id = ?', (new_filename, item_id)) + except OSError as e: + conn.close(); return jsonify({'error': f'Failed to rename file on disk: {e}'}), 500 + else: + conn.close(); return jsonify({'error': 'Invalid item type'}), 400 + + conn.commit() + conn.close() + return jsonify({'success': True}) + +@main_bp.route('/delete_folder/', methods=[METHOD_DELETE]) +@login_required +def delete_folder(folder_id): + conn = get_db_connection() + folder_owner = conn.execute('SELECT user_id FROM folders WHERE id = ?', (folder_id,)).fetchone() + if not folder_owner or folder_owner['user_id'] != current_user.id: + conn.close(); return jsonify({'error': 'Unauthorized'}), 403 + + folder_ids_to_delete = [folder_id] + get_all_descendant_folder_ids(conn, folder_id, current_user.id) + placeholders = ', '.join('?' * len(folder_ids_to_delete)) + + pdfs_to_delete = conn.execute(f'SELECT id, filename FROM generated_pdfs WHERE folder_id IN ({placeholders}) AND user_id = ?', (*folder_ids_to_delete, current_user.id)).fetchall() + + for pdf in pdfs_to_delete: + try: os.remove(os.path.join(current_app.config['OUTPUT_FOLDER'], pdf['filename'])) + except OSError: pass + + pdf_ids = [p['id'] for p in pdfs_to_delete] + if pdf_ids: + placeholders = ','.join(map(str, pdf_ids)) + conn.execute(f'DELETE FROM generated_pdfs WHERE id IN ({placeholders})') + + conn.execute(f'DELETE FROM folders WHERE id IN ({placeholders})', folder_ids_to_delete) + + conn.commit() + conn.close() + return jsonify({'success': True}) + +@main_bp.route('/delete_generated_pdf/', methods=[METHOD_DELETE]) +@login_required +def delete_generated_pdf(pdf_id): + try: + conn = get_db_connection() + pdf_info = conn.execute('SELECT filename, user_id FROM generated_pdfs WHERE id = ?', (pdf_id,)).fetchone() + if pdf_info and pdf_info['user_id'] == current_user.id: + try: os.remove(os.path.join(current_app.config['OUTPUT_FOLDER'], pdf_info['filename'])) + except OSError: pass + conn.execute('DELETE FROM generated_pdfs WHERE id = ?', (pdf_id,)) + conn.commit() + conn.close() + return jsonify({'success': True}) + except Exception as e: + return jsonify({'error': str(e)}), 500 + +@main_bp.route('/toggle_persist_generated_pdf/', methods=[METHOD_POST]) +@login_required +def toggle_persist_generated_pdf(pdf_id): + try: + conn = get_db_connection() + pdf_info = conn.execute('SELECT persist, session_id, user_id FROM generated_pdfs WHERE id = ?', (pdf_id,)).fetchone() + + if not pdf_info or pdf_info['user_id'] != current_user.id: + conn.close(); return jsonify({'error': 'Unauthorized'}), 403 + + new_status = 1 - pdf_info['persist'] + session_id = pdf_info['session_id'] + + conn.execute('UPDATE generated_pdfs SET persist = ? WHERE id = ?', (new_status, pdf_id)) + if session_id: + session_owner = conn.execute('SELECT user_id FROM sessions WHERE id = ?', (session_id,)).fetchone() + if session_owner and session_owner['user_id'] == current_user.id: + conn.execute('UPDATE sessions SET persist = ? WHERE id = ?', (new_status, session_id)) + + conn.commit() + conn.close() + return jsonify({'success': True, 'status': 'persisted' if new_status == 1 else 'not_persisted'}) + except Exception as e: + print(f"Error in toggle_persist_generated_pdf: {e}") + conn.rollback(); conn.close(); return jsonify({'error': str(e)}), 500 + +@main_bp.route('/bulk_delete_pdfs', methods=[METHOD_POST]) +@login_required +def bulk_delete_pdfs(): + data = request.json + pdf_ids = data.get('ids', []) + if not pdf_ids: return jsonify({'error': 'No PDF IDs provided'}), 400 + try: + conn = get_db_connection() + placeholders = ','.join('?' for _ in pdf_ids) + owned_pdfs = conn.execute(f'SELECT id, filename FROM generated_pdfs WHERE id IN ({placeholders}) AND user_id = ?', (*pdf_ids, current_user.id)).fetchall() + + owned_pdf_ids = [pdf['id'] for pdf in owned_pdfs] + if not owned_pdf_ids: + conn.close() + return jsonify({'success': True, 'message': 'No owned PDFs to delete.'}) + + for pdf in owned_pdfs: + try: os.remove(os.path.join(current_app.config['OUTPUT_FOLDER'], pdf['filename'])) + except OSError: pass + + delete_placeholders = ','.join('?' for _ in owned_pdf_ids) + conn.execute(f'DELETE FROM generated_pdfs WHERE id IN ({delete_placeholders})', owned_pdf_ids) + conn.commit() + conn.close() + return jsonify({'success': True}) + except Exception as e: + return jsonify({'error': str(e)}), 500 + +@main_bp.route('/bulk_toggle_persist', methods=[METHOD_POST]) +@login_required +def bulk_toggle_persist(): + data = request.json + pdf_ids = data.get('ids', []) + if not pdf_ids: return jsonify({'error': 'No PDF IDs provided'}), 400 + try: + conn = get_db_connection() + placeholders = ','.join('?' for _ in pdf_ids) + owned_pdfs = conn.execute(f'SELECT id, persist, session_id FROM generated_pdfs WHERE id IN ({placeholders}) AND user_id = ?', (*pdf_ids, current_user.id)).fetchall() + + for pdf in owned_pdfs: + new_status = 1 - pdf['persist'] + session_id = pdf['session_id'] + conn.execute('UPDATE generated_pdfs SET persist = ? WHERE id = ?', (new_status, pdf['id'])) + if session_id: + conn.execute('UPDATE sessions SET persist = ? WHERE id = ? AND user_id = ?', (new_status, session_id, current_user.id)) + + conn.commit() + conn.close() + return jsonify({'success': True}) + except Exception as e: + print(f"Error in bulk_toggle_persist: {e}") + conn.rollback(); conn.close(); return jsonify({'error': str(e)}), 500 + +@main_bp.route('/bulk_download_pdfs', methods=[METHOD_POST]) +@login_required +def bulk_download_pdfs(): + data = request.json + pdf_ids = data.get('ids', []) + if not pdf_ids: return jsonify({'error': 'No PDF IDs provided'}), 400 + + memory_file = io.BytesIO() + + try: + with zipfile.ZipFile(memory_file, 'w', zipfile.ZIP_DEFLATED) as zf: + conn = get_db_connection() + placeholders = ','.join('?' for _ in pdf_ids) + owned_pdfs = conn.execute(f'SELECT filename FROM generated_pdfs WHERE id IN ({placeholders}) AND user_id = ?', (*pdf_ids, current_user.id)).fetchall() + for pdf_info in owned_pdfs: + pdf_path = os.path.join(current_app.config['OUTPUT_FOLDER'], pdf_info['filename']) + if os.path.exists(pdf_path): zf.write(pdf_path, os.path.basename(pdf_path)) + conn.close() + except Exception as e: + return jsonify({'error': str(e)}), 500 + + memory_file.seek(0) + + return send_file(memory_file, mimetype='application/zip', as_attachment=True, download_name='pdfs.zip') + +@main_bp.route('/create_folder', methods=[METHOD_POST]) +@login_required +def create_folder(): + data = request.json + name, parent_id = data.get('new_folder_name'), data.get('parent_id') + if not name: return jsonify({'error': 'Folder name is required'}), 400 + + try: + conn = get_db_connection() + cursor = conn.cursor() + cursor.execute("INSERT INTO folders (name, parent_id, user_id) VALUES (?, ?, ?)", (name, parent_id, current_user.id)) + new_folder_id = cursor.lastrowid + conn.commit() + conn.close() + return jsonify({'success': True, 'id': new_folder_id, 'name': name, 'parent_id': parent_id}) + except Exception as e: + return jsonify({'error': str(e)}), 500 + +@main_bp.route('/bulk_move_pdfs', methods=[METHOD_POST]) +@login_required +def bulk_move_pdfs(): + data = request.json + pdf_ids, target_folder_id = data.get('ids', []), data.get('target_folder_id') + if not pdf_ids: return jsonify({'error': 'No PDF IDs provided'}), 400 + + try: + conn = get_db_connection() + if target_folder_id: + folder_owner = conn.execute('SELECT user_id FROM folders WHERE id = ?', (target_folder_id,)).fetchone() + if not folder_owner or folder_owner['user_id'] != current_user.id: + conn.close(); return jsonify({'error': 'Unauthorized target folder'}), 403 + + placeholders = ', '.join('?' * len(pdf_ids)) + conn.execute(f'UPDATE generated_pdfs SET folder_id = ? WHERE id IN ({placeholders}) AND user_id = ?', (target_folder_id, *pdf_ids, current_user.id)) + conn.commit() + conn.close() + return jsonify({'success': True}) + except Exception as e: + return jsonify({'error': str(e)}), 500 + +@main_bp.route('/merge_pdfs', methods=[METHOD_POST]) +@login_required +def merge_pdfs(): + data = request.json + pdf_ids = data.get('pdf_ids', []) + if len(pdf_ids) < 2: return jsonify({'error': 'Please select at least two PDFs to merge.'}), 400 + + try: + conn = get_db_connection() + safe_pdf_ids = [int(pid) for pid in pdf_ids] + placeholders = ', '.join('?' * len(safe_pdf_ids)) + query = f"SELECT filename FROM generated_pdfs WHERE id IN ({placeholders}) AND user_id = ?" + pdfs_to_merge = conn.execute(query, (*safe_pdf_ids, current_user.id)).fetchall() + + if len(pdfs_to_merge) != len(safe_pdf_ids): + conn.close(); return jsonify({'error': 'One or more selected PDFs not found or are unauthorized.'}), 404 + + merged_doc = fitz.open() + source_filenames = [] + for pdf_row in pdfs_to_merge: + filename = pdf_row['filename'] + source_filenames.append(filename) + pdf_path = os.path.join(current_app.config['OUTPUT_FOLDER'], filename) + if os.path.exists(pdf_path): + doc_to_merge = fitz.open(pdf_path) + merged_doc.insert_pdf(doc_to_merge) + doc_to_merge.close() + + new_filename = f"merged_{datetime.now().strftime('%Y%m%d_%H%M%S')}.pdf" + merged_doc.save(os.path.join(current_app.config['OUTPUT_FOLDER'], new_filename)) + merged_doc.close() + + session_id = str(uuid.uuid4()) + conn.execute('INSERT INTO sessions (id, original_filename, user_id) VALUES (?, ?, ?)', (session_id, f"Merged from {len(source_filenames)} files", current_user.id)) + + subject = "Merged Document" + notes = f"This document was created by merging the following files:\n" + "\n".join(source_filenames) + + conn.execute( + 'INSERT INTO generated_pdfs (session_id, filename, subject, tags, notes, source_filename, user_id) VALUES (?, ?, ?, ?, ?, ?, ?)', + (session_id, new_filename, subject, 'merged', notes, ", ".join(source_filenames), current_user.id) + ) + conn.commit() + conn.close() + return jsonify({'success': True, 'new_filename': new_filename}) + + except Exception as e: + if 'conn' in locals() and conn: conn.close() + print(f"Error merging PDFs: {e}") + return jsonify({'error': str(e)}), 500 + +@main_bp.route('/upload_final_pdf') +@login_required +def upload_final_pdf(): + return render_template('upload_final_pdf.html') + + +@main_bp.route('/handle_final_pdf_upload', methods=[METHOD_POST]) +@login_required +def handle_final_pdf_upload(): + subject = request.form.get('subject') + if not subject: return 'Subject is required', 400 + + tags, notes = request.form.get('tags'), request.form.get('notes') + conn = get_db_connection() + + def process_and_save_pdf(file_content, original_filename): + session_id = str(uuid.uuid4()) + # Associate session with user + conn.execute('INSERT INTO sessions (id, original_filename, user_id) VALUES (?, ?, ?)', + (session_id, original_filename, current_user.id)) + + secure_name = secure_filename(original_filename) + output_filename = f"{session_id}_{secure_name}" + output_path = os.path.join(current_app.config['OUTPUT_FOLDER'], output_filename) + + with open(output_path, 'wb') as f: + f.write(file_content) + + # Associate generated PDF with user + conn.execute( + 'INSERT INTO generated_pdfs (session_id, filename, subject, tags, notes, source_filename, user_id) VALUES (?, ?, ?, ?, ?, ?, ?)', + (session_id, output_filename, subject, tags, notes, original_filename, current_user.id) + ) + conn.commit() + + try: + if 'pdf' in request.files and request.files['pdf'].filename: + file = request.files['pdf'] + if file and file.filename.lower().endswith('.pdf'): + process_and_save_pdf(file.read(), file.filename) + else: + return 'Invalid file type', 400 + + elif 'pdf_url' in request.form and request.form['pdf_url']: + pdf_url = request.form['pdf_url'] + + # Handle Google Drive URLs + pdf_url = convert_google_drive_url(pdf_url) + + response = requests.get(pdf_url, allow_redirects=True) + response.raise_for_status() + + original_filename = None + # Try to get filename from Content-Disposition header + if 'Content-Disposition' in response.headers: + cd = response.headers['Content-Disposition'] + fname_match = re.search(r'filename="?([^";]+)"?', cd) + if fname_match: + original_filename = fname_match.group(1) + + if not original_filename: + original_filename = os.path.basename(urlparse(pdf_url).path) + + if not original_filename or not original_filename.lower().endswith('.pdf'): + original_filename = 'downloaded_document.pdf' + + process_and_save_pdf(response.content, original_filename) + + elif 'curl_command' in request.form and request.form['curl_command']: + curl_input = request.form['curl_command'].strip().replace('\n', ',') + curl_commands = [cmd.strip() for cmd in curl_input.split(',') if cmd.strip()] + + for command in curl_commands: + url, filename = _parse_curl_command(command) + if not url or not filename: + current_app.logger.warning(f"Could not parse cURL command: {command}") + continue + + # Handle Google Drive URLs in cURL + url = convert_google_drive_url(url) + + response = requests.get(url, allow_redirects=True) + response.raise_for_status() + process_and_save_pdf(response.content, filename) + + else: + return 'No PDF file, URL, or cURL command provided', 400 + + except requests.RequestException as e: + current_app.logger.error(f"Failed to download PDF from URL: {e}") + return f"Failed to download PDF: {e}", 500 + except Exception as e: + current_app.logger.error(f"An error occurred during PDF upload: {e}") + return "An internal error occurred.", 500 + finally: + conn.close() + + return redirect(url_for('main.pdf_manager')) + +@main_bp.route('/resize/', defaults={'folder_path': ''}, methods=['GET', 'POST']) +@main_bp.route('/resize/browse/', methods=['GET', 'POST']) +@login_required +def resize_pdf_route(folder_path): + if request.method == 'POST': + input_pdf_name, output_pdf_name = request.form.get('input_pdf'), request.form.get('output_pdf') + bg_color_hex, pattern = request.form.get('bg_color', '#FFFFFF'), request.form.get('pattern') + pattern_color_hex = request.form.get('pattern_color', '#CCCCCC') + mode, stitch_direction = request.form.get('mode', 'notes_only'), request.form.get('stitch_direction', 'horizontal') + add_space = 'add_space' in request.form + + if not input_pdf_name or not output_pdf_name: return "Missing input or output PDF name", 400 + + conn = get_db_connection() + pdf_owner = conn.execute('SELECT user_id FROM generated_pdfs WHERE filename = ?', (input_pdf_name,)).fetchone() + if not pdf_owner or pdf_owner['user_id'] != current_user.id: + conn.close() + return "Unauthorized", 403 + + def hex_to_rgb(h): + h = h.lstrip('#') + return tuple(int(h[i:i+2], 16) / 255.0 for i in (0, 2, 4)) + + input_pdf_path = os.path.join(current_app.config['OUTPUT_FOLDER'], input_pdf_name) + output_pdf_path = os.path.join(current_app.config['OUTPUT_FOLDER'], output_pdf_name) + bg_color, pattern_color = hex_to_rgb(bg_color_hex), hex_to_rgb(pattern_color_hex) + + try: + expand_pdf_for_notes( + input_pdf_path, output_pdf_path, bg_color=bg_color, mode=mode, + stitch_direction=stitch_direction, add_space=add_space, + pattern=pattern, pattern_color=pattern_color + ) + + session_id = str(uuid.uuid4()) + conn.execute('INSERT INTO sessions (id, original_filename, user_id) VALUES (?, ?, ?)', (session_id, f"Resized from {input_pdf_name}", current_user.id)) + + subject = f"Resized - {os.path.basename(input_pdf_name)}" + notes = f"Resized with options: mode={mode}, stitch_direction={stitch_direction}, add_space={add_space}, bg_color={bg_color_hex}, pattern={pattern}" + + conn.execute( + 'INSERT INTO generated_pdfs (session_id, filename, subject, tags, notes, source_filename, user_id) VALUES (?, ?, ?, ?, ?, ?, ?)', + (session_id, output_pdf_name, subject, 'resized', notes, input_pdf_name, current_user.id) + ) + conn.commit() + conn.close() + + return redirect(url_for('main.pdf_manager')) + except Exception as e: + conn.close() + return f"Error during resizing or database update: {e}", 500 + + else: # GET request + conn = get_db_connection() + search_query, is_recursive = request.args.get('search', ''), request.args.get('recursive') == 'true' + query_params, where_clauses = [current_user.id], ['user_id = ?'] + + if search_query: + where_clauses.append('(filename LIKE ? OR subject LIKE ? OR tags LIKE ?)') + search_term = f'%{search_query}%' + query_params.extend([search_term, search_term, search_term]) + + folder_id, subfolders, breadcrumbs = None, [], [] + + if folder_path: + parts = folder_path.split('/') + parent_id = None + for i, part in enumerate(parts): + res = conn.execute("SELECT id FROM folders WHERE name = ? AND user_id = ? AND (parent_id = ? OR (? IS NULL AND parent_id IS NULL))", (part, current_user.id, parent_id, parent_id)).fetchone() + if not res: return redirect(url_for('main.resize_pdf_route')) + parent_id = res['id'] + breadcrumbs.append({'name': part, 'path': '/'.join(parts[:i+1])}) + folder_id = parent_id + + if is_recursive and search_query: + if folder_id: + descendant_ids = get_all_descendant_folder_ids(conn, folder_id, current_user.id) + all_folder_ids = [folder_id] + descendant_ids + if all_folder_ids: + placeholders = ', '.join('?' * len(all_folder_ids)) + where_clauses.append(f'folder_id IN ({placeholders})') + query_params.extend(all_folder_ids) + else: + if folder_id: where_clauses.append('folder_id = ?'); query_params.append(folder_id) + else: where_clauses.append('folder_id IS NULL') + + if folder_id: + subfolders = conn.execute('SELECT * FROM folders WHERE parent_id = ? AND user_id = ? ORDER BY name', (folder_id, current_user.id)).fetchall() + else: + subfolders = conn.execute('SELECT * FROM folders WHERE parent_id IS NULL AND user_id = ? ORDER BY name', (current_user.id,)).fetchall() + + base_query = 'SELECT * FROM generated_pdfs WHERE ' + ' AND '.join(where_clauses) + base_query += ' ORDER BY created_at DESC' + + pdfs = conn.execute(base_query, query_params).fetchall() + folder_tree = get_folder_tree(user_id=current_user.id) + conn.close() + + return render_template('resize.html', pdfs=[dict(row) for row in pdfs], subfolders=[dict(row) for row in subfolders], + current_folder_id=folder_id, breadcrumbs=breadcrumbs, folder_tree=folder_tree, + search_query=search_query, recursive=is_recursive) + +@main_bp.route('/print_pdfs', methods=['POST']) +@login_required +def print_pdfs(): + pdf_ids = request.form.getlist('pdf_ids') + current_app.logger.info(f"User {current_user.id} printing PDFs with IDs: {pdf_ids}") + if not pdf_ids: + return jsonify({'error': 'No PDFs selected'}), 400 + + conn = get_db_connection() + placeholders = ','.join('?' for _ in pdf_ids) + query = f"SELECT filename, subject FROM generated_pdfs WHERE id IN ({placeholders}) AND user_id = ?" + pdfs_info = conn.execute(query, (*pdf_ids, current_user.id)).fetchall() + conn.close() + + current_app.logger.info(f"Found {len(pdfs_info)} owned PDFs to print.") + if not pdfs_info: + return jsonify({'error': 'No valid PDFs found for the given IDs'}), 404 + + merged_pdf = fitz.open() + font_path = "arial.ttf" + + for i, pdf_info in enumerate(pdfs_info): + pdf_path = os.path.join(current_app.config['OUTPUT_FOLDER'], pdf_info['filename']) + if os.path.exists(pdf_path): + try: + doc = fitz.open(pdf_path) + if len(doc) > 0: + first_page = doc[0] + rect = first_page.rect + text = f"Subject: {pdf_info['subject']}\nFilename: {pdf_info['filename']}" + text_rect = fitz.Rect(rect.width * 0.02, 0, rect.width * 0.98, rect.height * 0.05) + first_page.insert_textbox(text_rect, text, fontsize=5, fontname="Arial", fontfile=font_path, color=(0, 0, 0), overlay=True, align=fitz.TEXT_ALIGN_CENTER) + merged_pdf.insert_pdf(doc) + doc.close() + except Exception as e: + current_app.logger.error(f"ERROR processing PDF '{pdf_info['filename']}': {e}") + else: + current_app.logger.warning(f"PDF file not found at '{pdf_path}'") + + timestamp = datetime.now().strftime("%Y%m%d%H%M%S") + temp_filename = f'printed_documents_{timestamp}.pdf' + temp_filepath = os.path.join(current_app.config['TEMP_FOLDER'], temp_filename) + + os.makedirs(current_app.config['TEMP_FOLDER'], exist_ok=True) + merged_pdf.save(temp_filepath) + merged_pdf.close() + + return jsonify({'success': True, 'url': url_for('main.view_generated_pdf', filename=temp_filename)}) + +@main_bp.route('/view_generated_pdf/') +def view_generated_pdf(filename): + """Serves a generated PDF from the temporary folder.""" + safe_filename = secure_filename(filename) + filepath = os.path.join(current_app.config['TEMP_FOLDER'], safe_filename) + if not os.path.exists(filepath): + return "File not found.", 404 + return send_file(filepath, mimetype='application/pdf', as_attachment=False) + +@main_bp.route('/redact_status/') +@login_required +def redact_status(session_id): + conn = get_db_connection() + session_owner = conn.execute('SELECT user_id FROM sessions WHERE id = ?', (session_id,)).fetchone() + conn.close() + if not session_owner or session_owner['user_id'] != current_user.id: + return "Unauthorized", 403 + return render_template('redact_status.html', session_id=session_id) + +@main_bp.route('/redaction_stream/') +@login_required +def redaction_stream(session_id): + def generate(): + conn = get_db_connection() + session_owner = conn.execute('SELECT user_id FROM sessions WHERE id = ?', (session_id,)).fetchone() + if not session_owner or session_owner['user_id'] != current_user.id: + conn.close() + yield f"data: {json.dumps({'error': 'Unauthorized'})}\n\n" + return + + if not NVIDIA_NIM_AVAILABLE: + yield f"data: {json.dumps({'error': 'NVIDIA API Key is not configured.'})}\n\n"; return + + images = conn.execute("SELECT id, filename FROM images WHERE session_id = ? AND image_type = 'original' ORDER BY image_index", (session_id,)).fetchall() + + if not images: + conn.close() + yield f"data: {json.dumps({'error': 'No images found for this session.'})}\n\n" + return + + redacted_image_paths, source_filenames_for_notes = [], [] + total_images = len(images) + + try: + for i, image_row in enumerate(images): + progress = int(((i + 1) / total_images) * 100) + yield f"data: {json.dumps({'progress': progress, 'message': f'Redacting page {i + 1} of {total_images}...'})}\n\n" + + original_filename = image_row['filename'] + source_filenames_for_notes.append(original_filename) + original_path = os.path.join(current_app.config['UPLOAD_FOLDER'], original_filename) + + if not os.path.exists(original_path): continue + + redacted_image = redact_pictures_in_image(original_path, NVIDIA_API_KEY) + + processed_filename = f"redacted_{original_filename}" + processed_path = os.path.join(current_app.config['PROCESSED_FOLDER'], processed_filename) + redacted_image.save(processed_path, 'PNG') + redacted_image_paths.append(processed_path) + + conn.execute("UPDATE images SET processed_filename = ? WHERE id = ?", (processed_filename, image_row['id'])) + conn.commit() + + yield f"data: {json.dumps({'progress': 100, 'message': 'Assembling final PDF...'})}\n\n" + final_pdf_filename = f"redacted_document_{session_id}.pdf" + final_pdf_path = os.path.join(current_app.config['OUTPUT_FOLDER'], final_pdf_filename) + + if not create_pdf_from_full_images(redacted_image_paths, final_pdf_path): raise Exception("Failed to create the final PDF.") + + session_info = conn.execute('SELECT original_filename FROM sessions WHERE id = ?', (session_id,)).fetchone() + subject = f"Redacted - {session_info['original_filename'] if session_info else 'Document'}" + notes = f"This document was automatically redacted." + + conn.execute( + 'INSERT INTO generated_pdfs (session_id, filename, subject, tags, notes, source_filename, user_id) VALUES (?, ?, ?, ?, ?, ?, ?)', + (session_id, final_pdf_filename, subject, 'redacted', notes, ", ".join(source_filenames_for_notes), current_user.id) + ) + conn.commit() + + download_url = url_for('main.download_file', filename=final_pdf_filename) + yield f"data: {json.dumps({'complete': True, 'download_url': download_url})}\n\n" + + except Exception as e: + yield f"data: {json.dumps({'error': str(e)})}\n\n" + finally: + conn.close() + + return Response(generate(), mimetype='text/event-stream') + +@main_bp.route('/chart') +@login_required +def chart(): + conn = get_db_connection() + + total_sessions = conn.execute('SELECT COUNT(*) FROM sessions WHERE user_id = ?', (current_user.id,)).fetchone()[0] + total_pdfs = conn.execute('SELECT COUNT(*) FROM generated_pdfs WHERE user_id = ?', (current_user.id,)).fetchone()[0] + + total_questions = conn.execute(""" + SELECT COUNT(q.id) FROM questions q + JOIN sessions s ON q.session_id = s.id + WHERE s.user_id = ? + """, (current_user.id,)).fetchone()[0] + + total_classified_questions = conn.execute(""" + SELECT COUNT(q.id) FROM questions q + JOIN sessions s ON q.session_id = s.id + WHERE s.user_id = ? AND q.subject IS NOT NULL AND q.chapter IS NOT NULL + """, (current_user.id,)).fetchone()[0] + + conn.close() + + return render_template('chart.html', + total_sessions=total_sessions, + total_pdfs=total_pdfs, + total_questions=total_questions, + total_classified_questions=total_classified_questions) + diff --git a/run.py b/run.py new file mode 100644 index 0000000000000000000000000000000000000000..dabd72592f2d6f9ee3772f0f7fc63e019871f417 --- /dev/null +++ b/run.py @@ -0,0 +1,7 @@ +from dotenv import load_dotenv +load_dotenv() # Load environment variables from .env file + +from app import app, socketio + +if __name__ == '__main__': + socketio.run(app, debug=True, port=1302, host='0.0.0.0',allow_unsafe_werkzeug=True) diff --git a/settings_routes.py b/settings_routes.py new file mode 100644 index 0000000000000000000000000000000000000000..42d54474752955f1d5bb8ea50094e968f472b068 --- /dev/null +++ b/settings_routes.py @@ -0,0 +1,88 @@ +from flask import Blueprint, render_template, request, redirect, url_for, flash, current_app +from flask_login import login_required, current_user +from utils import get_db_connection +import os + +settings_bp = Blueprint('settings', __name__) + +@settings_bp.route('/settings', methods=['GET', 'POST']) +@login_required +def settings(): + if request.method == 'POST': + # --- Handle Client Secret Upload --- + if 'client_secret' in request.files: + file = request.files['client_secret'] + if file and file.filename: + if file.filename.endswith('.json'): + try: + save_path = os.path.join(current_app.root_path, 'client_secret.json') + file.save(save_path) + flash('Client Secret uploaded successfully!', 'success') + except Exception as e: + flash(f'Error saving file: {e}', 'danger') + else: + flash('Invalid file type. Please upload a JSON file.', 'danger') + + # --- Handle NeetPrep Toggle --- + neetprep_enabled = 1 if request.form.get('neetprep_enabled') else 0 + + # --- Handle V2 Default Toggle --- + v2_default = 1 if request.form.get('v2_default') else 0 + + # --- Handle Magnifier Toggle --- + magnifier_enabled = 1 if request.form.get('magnifier_enabled') else 0 + + # --- Handle Classifier Model Setting --- + classifier_model = request.form.get('classifier_model', 'gemini') + if classifier_model not in ['gemini', 'nova', 'gemma']: + classifier_model = 'gemini' + + # --- Handle DPI Setting --- + dpi_input = request.form.get('dpi') + if not dpi_input: + dpi = 300 + else: + try: + dpi = int(dpi_input) + if not (72 <= dpi <= 900): + flash('Invalid DPI value. Please enter a number between 72 and 900.', 'danger') + return redirect(url_for('settings.settings')) + except (ValueError, TypeError): + flash('Invalid DPI value. Please enter a valid number.', 'danger') + return redirect(url_for('settings.settings')) + + # --- Handle Color RM DPI Setting --- + color_rm_dpi_input = request.form.get('color_rm_dpi') + if not color_rm_dpi_input: + color_rm_dpi = 200 + else: + try: + color_rm_dpi = int(color_rm_dpi_input) + if not (72 <= color_rm_dpi <= 600): + flash('Invalid Color Removal DPI value. Please enter a number between 72 and 600.', 'danger') + return redirect(url_for('settings.settings')) + except (ValueError, TypeError): + flash('Invalid Color Removal DPI value. Please enter a valid number.', 'danger') + return redirect(url_for('settings.settings')) + + # --- Update Database --- + conn = get_db_connection() + conn.execute('UPDATE users SET neetprep_enabled = ?, v2_default = ?, magnifier_enabled = ?, dpi = ?, color_rm_dpi = ?, classifier_model = ? WHERE id = ?', + (neetprep_enabled, v2_default, magnifier_enabled, dpi, color_rm_dpi, classifier_model, current_user.id)) + conn.commit() + conn.close() + + # --- Update current_user object for the session --- + current_user.neetprep_enabled = neetprep_enabled + current_user.v2_default = v2_default + current_user.magnifier_enabled = magnifier_enabled + current_user.dpi = dpi + current_user.color_rm_dpi = color_rm_dpi + current_user.classifier_model = classifier_model + + flash('Settings saved successfully!', 'success') + return redirect(url_for('settings.settings')) + + client_secret_exists = os.path.exists(os.path.join(current_app.root_path, 'client_secret.json')) + drive_redirect_uri = url_for('drive.oauth2callback', _external=True) + return render_template('settings.html', client_secret_exists=client_secret_exists, drive_redirect_uri=drive_redirect_uri) diff --git a/speedtest_server.sh b/speedtest_server.sh new file mode 100644 index 0000000000000000000000000000000000000000..a47d1770014b851f9fb9bc339a481831bf31ca08 --- /dev/null +++ b/speedtest_server.sh @@ -0,0 +1,42 @@ +#!/bin/bash +# +# This script creates a large file and starts a simple web server +# to allow you to test the download speed from your server. +# + +FILE_NAME="testfile_100mb" +FILE_SIZE_MB=100 +PORT=8000 + +# Create a 100MB file +echo "Creating a ${FILE_SIZE_MB}MB test file named ${FILE_NAME}..." +dd if=/dev/zero of=${FILE_NAME} bs=1M count=${FILE_SIZE_MB} + +# Get the server's IP address +IP_ADDRESS=$(hostname -I | awk '{print $1}') + +echo "" +echo "------------------------------------------------------------------" +echo " Speed Test Server is running." +echo "------------------------------------------------------------------" +echo " Test file: ${FILE_NAME} (${FILE_SIZE_MB}MB)" +echo " Server IP: ${IP_ADDRESS}" +echo " Port: ${PORT}" +echo "" +echo "To test your download speed, open a web browser on your device and go to:" +echo "" +echo " http://${IP_ADDRESS}:${PORT}/${FILE_NAME}" +echo "" +echo "Or use this command in your terminal:" +echo "" +echo " wget http://${IP_ADDRESS}:${PORT}/${FILE_NAME}" +echo "" +echo "Press Ctrl+C to stop the server." +echo "------------------------------------------------------------------" + +# Start the Python web server +python3 -m http.server ${PORT} + +# Clean up the test file after the server is stopped +rm ${FILE_NAME} +echo "Test file removed." diff --git a/strings.py b/strings.py new file mode 100644 index 0000000000000000000000000000000000000000..d6c2e814644a4d0e9d880a508be584c315b4377d --- /dev/null +++ b/strings.py @@ -0,0 +1,55 @@ +# strings.py - Constants and route strings for the Report Generator application + +# Route URLs +ROUTE_INDEX = '/' +ROUTE_INDEX_V2 = '/v2' +ROUTE_IMAGES = '/images' +ROUTE_UPLOAD_PDF = '/upload_pdf' +ROUTE_UPLOAD_IMAGES = '/upload_images' +ROUTE_CROP_V2 = '/cropv2//' +ROUTE_PROCESS_CROP_V2 = '/process_crop_v2' +ROUTE_QUESTION_ENTRY_V2 = '/question_entry_v2/' +ROUTE_DASHBOARD = '/dashboard' +ROUTE_DELETE_SESSION = '/delete_session/' +ROUTE_DELETE_QUESTION = '/delete_question/' +ROUTE_SAVE_QUESTIONS = '/save_questions' +ROUTE_EXTRACT_QUESTION_NUMBER = '/extract_question_number' +ROUTE_EXTRACT_ALL_QUESTION_NUMBERS = '/extract_all_question_numbers' +ROUTE_GENERATE_PDF = '/generate_pdf' +ROUTE_DOWNLOAD = '/download/' +ROUTE_SERVE_IMAGE = '/image//' + +# HTTP Methods +METHOD_GET = 'GET' +METHOD_POST = 'POST' +METHOD_DELETE = 'DELETE' + +# Database constants +DB_SESSIONS_TABLE = 'sessions' +DB_IMAGES_TABLE = 'images' +DB_QUESTIONS_TABLE = 'questions' + +# File types +FILE_TYPE_ORIGINAL = 'original' +FILE_TYPE_CROPPED = 'cropped' + +# Status messages +STATUS_SUCCESS = 'success' +STATUS_ERROR = 'error' + +# Error messages +ERROR_NO_PDF_FILE_PART = 'No PDF file part' +ERROR_NO_SELECTED_FILE = 'No selected file' +ERROR_INVALID_FILE_TYPE = 'Invalid file type, please upload a PDF' +ERROR_NO_IMAGE_FILES_PART = 'No image files part' +ERROR_NO_SELECTED_FILES = 'No selected files' +ERROR_INVALID_IMAGE_TYPE = 'Invalid file type. Please upload only image files (PNG, JPG, JPEG, GIF, BMP)' +ERROR_SESSION_NOT_FOUND = 'Session not found' +ERROR_IMAGE_NOT_FOUND = 'Image not found' +ERROR_PROCESSING_FAILED = 'Processing failed' + +# Success messages +SUCCESS_PDF_UPLOADED = 'PDF uploaded successfully' +SUCCESS_IMAGES_UPLOADED = 'Images uploaded successfully' +SUCCESS_QUESTIONS_SAVED = 'Questions saved successfully' +SUCCESS_PDF_GENERATED = 'PDF generated successfully' \ No newline at end of file diff --git a/subjective_routes.py b/subjective_routes.py new file mode 100644 index 0000000000000000000000000000000000000000..0dbd890a5b952c92aeb11c49e77ea4f10f0dd2a3 --- /dev/null +++ b/subjective_routes.py @@ -0,0 +1,485 @@ +from flask import Blueprint, render_template, request, jsonify, redirect, url_for, flash, current_app +from flask_login import login_required, current_user +from database import get_db_connection, get_subjective_folder_tree, get_all_descendant_folder_ids +from gemini_subjective import generate_subjective_questions +from werkzeug.utils import secure_filename +import json +import os +import re # Import the regular expression module + +subjective_bp = Blueprint('subjective', __name__) + +# Helper function for natural sorting +def natural_sort_key(s): + if s is None: + return (0, "") # Treat None as 0 and empty string for comparison + return [int(text) if text.isdigit() else text.lower() + for text in re.split('([0-9]+)', str(s))] + +@subjective_bp.route('/subjective_generator', methods=['GET']) +@login_required +def generator(): + return render_template('subjective_generator.html') + +@subjective_bp.route('/generate_subjective', methods=['POST']) +@login_required +def generate(): + json_input = request.form.get('json_data') + file = request.files.get('image') + + result = None + + if json_input and json_input.strip(): + try: + parsed_data = json.loads(json_input) + # Ensure we have a list of questions + if isinstance(parsed_data, dict) and 'data' in parsed_data: + parsed_data = parsed_data['data'] + elif not isinstance(parsed_data, list): + # Try to wrap single object in list if it looks like a question + if isinstance(parsed_data, dict) and 'question_topic' in parsed_data: + parsed_data = [parsed_data] + else: + flash('Invalid JSON: Expected a list of question objects.', 'danger') + return redirect(url_for('subjective.generator')) + + result = {'success': True, 'data': parsed_data} + except json.JSONDecodeError: + flash('Invalid JSON syntax.', 'danger') + return redirect(url_for('subjective.generator')) + + elif file and file.filename != '': + filename = secure_filename(file.filename) + temp_path = os.path.join(current_app.config['TEMP_FOLDER'], filename) + file.save(temp_path) + + result = generate_subjective_questions(temp_path) + + # Cleanup + try: + os.remove(temp_path) + except OSError: + pass + else: + flash('Please upload an image or provide JSON data.', 'danger') + return redirect(url_for('subjective.generator')) + + if result and result.get('success'): + grouped_questions = {} + for q in result.get('data', []): + topic = q.get('question_topic', 'Uncategorized') + if topic not in grouped_questions: + grouped_questions[topic] = [] + grouped_questions[topic].append(q) + + # Sort questions within each topic group by question_number_within_topic + for topic_name in grouped_questions: + grouped_questions[topic_name] = sorted( + grouped_questions[topic_name], + key=lambda q: natural_sort_key(q.get('question_number_within_topic')) + ) + + page_title_topic = "Extracted Questions" + if result.get('data') and len(result.get('data')) > 0: + # Use the topic of the first question as a general page title, or keep 'Extracted Questions' + page_title_topic = result.get('data')[0].get('question_topic', "Extracted Questions") + + return render_template('subjective_results.html', grouped_questions=grouped_questions, topic=page_title_topic) + else: + flash('Failed to generate questions. Please try again.', 'danger') + return redirect(url_for('subjective.generator')) + +@subjective_bp.route('/save_subjective', methods=['POST']) +@login_required +def save(): + if request.is_json: + data = request.get_json() + questions = data.get('questions', []) + target_folder_id = data.get('folder_id') # Optional: save directly to a folder + else: + questions = [] + target_folder_id = None + + if not questions: + return jsonify({'success': False, 'message': 'No questions to save.'}), 400 + + conn = get_db_connection() + try: + for q in questions: + conn.execute(''' + INSERT INTO subjective_questions (user_id, question_topic, question_html, question_number_within_topic, folder_id) + VALUES (?, ?, ?, ?, ?) + ''', (current_user.id, q['question_topic'], q['question_html'], q['question_number_within_topic'], target_folder_id)) + conn.commit() + flash(f'{len(questions)} questions saved successfully!', 'success') + return jsonify({'success': True, 'redirect_url': url_for('subjective.list_questions')}) + except Exception as e: + conn.rollback() + return jsonify({'success': False, 'message': str(e)}), 500 + finally: + conn.close() + +@subjective_bp.route('/subjective_list') +@subjective_bp.route('/subjective_list/') +@login_required +def list_questions(folder_path=''): + conn = get_db_connection() + + # Folder Navigation Logic + folder_id = None + breadcrumbs = [] + + if folder_path: + parts = folder_path.split('/') + parent_id = None + for i, part in enumerate(parts): + res = conn.execute("SELECT id FROM subjective_folders WHERE name = ? AND user_id = ? AND (parent_id = ? OR (? IS NULL AND parent_id IS NULL))", (part, current_user.id, parent_id, parent_id)).fetchone() + if not res: + conn.close() + flash('Folder not found.', 'danger') + return redirect(url_for('subjective.list_questions')) + parent_id = res['id'] + breadcrumbs.append({'name': part, 'path': '/'.join(parts[:i+1])}) + folder_id = parent_id + + # Fetch Subfolders + if folder_id: + subfolders = conn.execute('SELECT * FROM subjective_folders WHERE parent_id = ? AND user_id = ? ORDER BY name', (folder_id, current_user.id)).fetchall() + questions_rows = conn.execute('SELECT * FROM subjective_questions WHERE folder_id = ? AND user_id = ? ORDER BY created_at DESC', (folder_id, current_user.id)).fetchall() + else: + subfolders = conn.execute('SELECT * FROM subjective_folders WHERE parent_id IS NULL AND user_id = ? ORDER BY name', (current_user.id,)).fetchall() + questions_rows = conn.execute('SELECT * FROM subjective_questions WHERE folder_id IS NULL AND user_id = ? ORDER BY created_at DESC', (current_user.id,)).fetchall() + + conn.close() + + # Convert to dicts to ensure template compatibility + subfolders = [dict(row) for row in subfolders] + questions_rows = [dict(row) for row in questions_rows] + + # Group questions by topic and find representative topic_order + temp_grouped = {} + topic_orders = {} + + for q in questions_rows: + topic = q['question_topic'] + if topic not in temp_grouped: + temp_grouped[topic] = [] + # Default order 0 if None + topic_orders[topic] = q.get('topic_order') or 0 + temp_grouped[topic].append(q) + + # Sort topics based on topic_order + sorted_topics = sorted(topic_orders.keys(), key=lambda t: topic_orders[t]) + + grouped_questions = {} + for topic in sorted_topics: + # Sort questions within topic + questions = sorted( + temp_grouped[topic], + key=lambda q: natural_sort_key(q.get('question_number_within_topic')) + ) + grouped_questions[topic] = questions + + folder_tree = get_subjective_folder_tree(current_user.id) + + return render_template( + 'subjective_list.html', + grouped_questions=grouped_questions, + subfolders=subfolders, + breadcrumbs=breadcrumbs, + current_folder_id=folder_id, + folder_tree=folder_tree + ) + +@subjective_bp.route('/subjective/print/') +@login_required +def print_folder(folder_id): + conn = get_db_connection() + + try: + target_folder_id = None + if folder_id and folder_id != 'root': + target_folder_id = folder_id + + # Fetch questions for the specific folder + if target_folder_id: + questions_rows = conn.execute('SELECT * FROM subjective_questions WHERE folder_id = ? AND user_id = ? ORDER BY created_at DESC', (target_folder_id, current_user.id)).fetchall() + else: + questions_rows = conn.execute('SELECT * FROM subjective_questions WHERE folder_id IS NULL AND user_id = ? ORDER BY created_at DESC', (current_user.id,)).fetchall() + + # Convert to dicts + questions_rows = [dict(row) for row in questions_rows] + + # Group questions by topic and find representative topic_order + temp_grouped = {} + topic_orders = {} + + for q in questions_rows: + topic = q['question_topic'] + if topic not in temp_grouped: + temp_grouped[topic] = [] + # Default order 0 if None + topic_orders[topic] = q.get('topic_order') or 0 + temp_grouped[topic].append(q) + + # Sort topics based on topic_order + sorted_topics = sorted(topic_orders.keys(), key=lambda t: topic_orders[t]) + + grouped_questions = {} + for topic in sorted_topics: + # Sort questions within topic + questions = sorted( + temp_grouped[topic], + key=lambda q: natural_sort_key(q.get('question_number_within_topic')) + ) + grouped_questions[topic] = questions + + return render_template('subjective_print.html', grouped_questions=grouped_questions) + + except Exception as e: + flash(f'Error preparing print view: {str(e)}', 'danger') + return redirect(url_for('subjective.list_questions')) + finally: + conn.close() + +@subjective_bp.route('/subjective/question/add', methods=['POST']) +@login_required +def add_subjective_question(): + data = request.json + topic = data.get('topic') + html = data.get('html') + json_data = data.get('json') + number = data.get('number') + folder_id = data.get('folder_id') + + if not topic or not html: + return jsonify({'error': 'Topic and Question are required'}), 400 + + conn = get_db_connection() + try: + conn.execute( + 'INSERT INTO subjective_questions (user_id, question_topic, question_html, question_json, question_number_within_topic, folder_id) VALUES (?, ?, ?, ?, ?, ?)', + (current_user.id, topic, html, json_data, number, folder_id) + ) + conn.commit() + return jsonify({'success': True}) + except Exception as e: + conn.rollback() + return jsonify({'error': str(e)}), 500 + finally: + conn.close() + +@subjective_bp.route('/subjective/question/update/', methods=['POST']) +@login_required +def update_subjective_question(question_id): + data = request.json + topic = data.get('topic') + html = data.get('html') + json_data = data.get('json') + number = data.get('number') + + if not topic or not html: + return jsonify({'error': 'Topic and Question are required'}), 400 + + conn = get_db_connection() + try: + # Check ownership + owner = conn.execute('SELECT user_id FROM subjective_questions WHERE id = ?', (question_id,)).fetchone() + if not owner or owner['user_id'] != current_user.id: + conn.close() + return jsonify({'error': 'Unauthorized'}), 403 + + conn.execute( + 'UPDATE subjective_questions SET question_topic = ?, question_html = ?, question_json = ?, question_number_within_topic = ? WHERE id = ?', + (topic, html, json_data, number, question_id) + ) + conn.commit() + return jsonify({'success': True}) + except Exception as e: + conn.rollback() + return jsonify({'error': str(e)}), 500 + finally: + conn.close() + +@subjective_bp.route('/subjective/question/delete/', methods=['DELETE']) +@login_required +def delete_subjective_question(question_id): + conn = get_db_connection() + try: + # Check ownership + owner = conn.execute('SELECT user_id FROM subjective_questions WHERE id = ?', (question_id,)).fetchone() + if not owner or owner['user_id'] != current_user.id: + conn.close() + return jsonify({'error': 'Unauthorized'}), 403 + + conn.execute('DELETE FROM subjective_questions WHERE id = ?', (question_id,)) + conn.commit() + return jsonify({'success': True}) + except Exception as e: + conn.rollback() + return jsonify({'error': str(e)}), 500 + finally: + conn.close() + +@subjective_bp.route('/subjective/topic/rename', methods=['POST']) +@login_required +def rename_subjective_topic(): + data = request.json + old_topic = data.get('old_topic') + new_topic = data.get('new_topic') + folder_id = data.get('folder_id') + + if not old_topic or not new_topic: + return jsonify({'error': 'Topic names required'}), 400 + + conn = get_db_connection() + try: + # Scope update to folder or root + if folder_id: + conn.execute( + 'UPDATE subjective_questions SET question_topic = ? WHERE question_topic = ? AND folder_id = ? AND user_id = ?', + (new_topic, old_topic, folder_id, current_user.id) + ) + else: + conn.execute( + 'UPDATE subjective_questions SET question_topic = ? WHERE question_topic = ? AND folder_id IS NULL AND user_id = ?', + (new_topic, old_topic, current_user.id) + ) + conn.commit() + return jsonify({'success': True}) + except Exception as e: + conn.rollback() + return jsonify({'error': str(e)}), 500 + finally: + conn.close() + +@subjective_bp.route('/subjective/topic/delete', methods=['POST']) +@login_required +def delete_subjective_topic(): + data = request.json + topic = data.get('topic') + folder_id = data.get('folder_id') + + if not topic: + return jsonify({'error': 'Topic name required'}), 400 + + conn = get_db_connection() + try: + if folder_id: + conn.execute( + 'DELETE FROM subjective_questions WHERE question_topic = ? AND folder_id = ? AND user_id = ?', + (topic, folder_id, current_user.id) + ) + else: + conn.execute( + 'DELETE FROM subjective_questions WHERE question_topic = ? AND folder_id IS NULL AND user_id = ?', + (topic, current_user.id) + ) + conn.commit() + return jsonify({'success': True}) + except Exception as e: + conn.rollback() + return jsonify({'error': str(e)}), 500 + finally: + conn.close() + +@subjective_bp.route('/subjective/topic/reorder', methods=['POST']) +@login_required +def reorder_subjective_topics(): + data = request.json + topic_order = data.get('topic_order', []) + folder_id = data.get('folder_id') + + if not topic_order: + return jsonify({'error': 'Topic order list required'}), 400 + + conn = get_db_connection() + try: + for index, topic in enumerate(topic_order): + # We use a negative index or just the index. + # To show "first", we want lower numbers. + if folder_id: + conn.execute( + 'UPDATE subjective_questions SET topic_order = ? WHERE question_topic = ? AND folder_id = ? AND user_id = ?', + (index, topic, folder_id, current_user.id) + ) + else: + conn.execute( + 'UPDATE subjective_questions SET topic_order = ? WHERE question_topic = ? AND folder_id IS NULL AND user_id = ?', + (index, topic, current_user.id) + ) + conn.commit() + return jsonify({'success': True}) + except Exception as e: + conn.rollback() + return jsonify({'error': str(e)}), 500 + finally: + conn.close() + +@subjective_bp.route('/subjective/create_folder', methods=['POST']) +@login_required +def create_folder(): + data = request.json + name = data.get('name') + parent_id = data.get('parent_id') + + if not name: + return jsonify({'error': 'Folder name is required'}), 400 + + conn = get_db_connection() + try: + cursor = conn.cursor() + cursor.execute('INSERT INTO subjective_folders (name, parent_id, user_id) VALUES (?, ?, ?)', (name, parent_id, current_user.id)) + new_id = cursor.lastrowid + conn.commit() + return jsonify({'success': True, 'id': new_id, 'name': name, 'parent_id': parent_id}) + except Exception as e: + conn.rollback() + return jsonify({'error': str(e)}), 500 + finally: + conn.close() + +@subjective_bp.route('/subjective/move_items', methods=['POST']) +@login_required +def move_items(): + data = request.json + question_ids = data.get('question_ids', []) + folder_ids = data.get('folder_ids', []) + target_folder_id = data.get('target_folder_id') + + if not question_ids and not folder_ids: + return jsonify({'error': 'No items selected'}), 400 + + conn = get_db_connection() + try: + # Verify target folder ownership if not root + if target_folder_id: + owner = conn.execute('SELECT user_id FROM subjective_folders WHERE id = ?', (target_folder_id,)).fetchone() + if not owner or owner['user_id'] != current_user.id: + conn.close() + return jsonify({'error': 'Unauthorized target folder'}), 403 + + # Move Questions + if question_ids: + placeholders = ', '.join('?' * len(question_ids)) + conn.execute(f'UPDATE subjective_questions SET folder_id = ? WHERE id IN ({placeholders}) AND user_id = ?', (target_folder_id, *question_ids, current_user.id)) + + # Move Folders + if folder_ids: + # Prevent moving a folder into itself + if target_folder_id and int(target_folder_id) in [int(fid) for fid in folder_ids]: + conn.close() + return jsonify({'error': 'Cannot move a folder into itself.'}), 400 + + # Ideally, we should also check for circular dependencies (moving parent into child), + # but for simplicity we'll just do the basic check above and basic ownership check. + placeholders = ', '.join('?' * len(folder_ids)) + conn.execute(f'UPDATE subjective_folders SET parent_id = ? WHERE id IN ({placeholders}) AND user_id = ?', (target_folder_id, *folder_ids, current_user.id)) + + conn.commit() + return jsonify({'success': True}) + except Exception as e: + conn.rollback() + return jsonify({'error': str(e)}), 500 + finally: + conn.close() diff --git a/templates/_header.html b/templates/_header.html new file mode 100644 index 0000000000000000000000000000000000000000..67d1745188ef3553cc1ca7221bcb9c3e9211dbfd --- /dev/null +++ b/templates/_header.html @@ -0,0 +1,9 @@ +
+

+ + {{ title | default('DocuPDF') }} +

+
+ {% block header_actions %}{% endblock %} +
+
\ No newline at end of file diff --git a/templates/_nav_links.html b/templates/_nav_links.html new file mode 100644 index 0000000000000000000000000000000000000000..de0a6b020237af9549e108de26c15e105667c933 --- /dev/null +++ b/templates/_nav_links.html @@ -0,0 +1,45 @@ + diff --git a/templates/_navbar.html b/templates/_navbar.html new file mode 100644 index 0000000000000000000000000000000000000000..e7b54132c6391ef4095aef4c39d8d245f86596b0 --- /dev/null +++ b/templates/_navbar.html @@ -0,0 +1,64 @@ + + + + diff --git a/templates/_scripts.html b/templates/_scripts.html new file mode 100644 index 0000000000000000000000000000000000000000..063d02b34686a8cc46453bd166c464dcd425d4b1 --- /dev/null +++ b/templates/_scripts.html @@ -0,0 +1 @@ +{# Common scripts can be placed here #} \ No newline at end of file diff --git a/templates/base.html b/templates/base.html new file mode 100644 index 0000000000000000000000000000000000000000..0f74bca110ac1924c05f57f793fd437e65f3ccee --- /dev/null +++ b/templates/base.html @@ -0,0 +1,46 @@ + + + + + + {% block title %}DocuPDF{% endblock %} + {% block styles %}{% endblock %} + + + + + + {% block head %}{% endblock %} + + + {% include '_navbar.html' %} +
+ {% block content %}{% endblock %} +
+ + + + {% block scripts %}{% endblock %} + + \ No newline at end of file diff --git a/templates/camera_mobile.html b/templates/camera_mobile.html new file mode 100644 index 0000000000000000000000000000000000000000..c2d64209af49d596cfb954b3792a9929852e250b --- /dev/null +++ b/templates/camera_mobile.html @@ -0,0 +1,220 @@ +{% extends "base.html" %} +{% block title %}Mobile Camera Feed{% endblock %} +{% block head %} + + +{% endblock %} +{% block content %} +
+
+ +
+ +
+ + +
+ +
+
+{% endblock %} +{% block scripts %} + +{% endblock %} diff --git a/templates/camera_receiver_component.html b/templates/camera_receiver_component.html new file mode 100644 index 0000000000000000000000000000000000000000..65938855c6e5ae08ec076dd0fc8799b934f2b085 --- /dev/null +++ b/templates/camera_receiver_component.html @@ -0,0 +1,142 @@ + + + + diff --git a/templates/camera_web.html b/templates/camera_web.html new file mode 100644 index 0000000000000000000000000000000000000000..5d4fb7a188fa9eafbe302688aa3f7b2f71b84132 --- /dev/null +++ b/templates/camera_web.html @@ -0,0 +1,74 @@ +{% extends "base.html" %} +{% block title %}Web Camera Feed{% endblock %} +{% block head %} + +{% endblock %} +{% block content %} +
+

Live Camera Feed (Web Receiver)

+
+ + +
+
+{% endblock %} +{% block scripts %} + +{% endblock %} diff --git a/templates/chart.html b/templates/chart.html new file mode 100644 index 0000000000000000000000000000000000000000..d2b052228f459aa130734ccc273768de31715937 --- /dev/null +++ b/templates/chart.html @@ -0,0 +1,53 @@ +{% extends "base.html" %} + +{% block title %}User Statistics{% endblock %} + +{% block content %} +
+
+
+

Your Application Statistics

+
+
+
+
+
+
+ +
Total Sessions
+

{{ total_sessions }}

+
+
+
+
+
+
+ +
Total PDFs Generated
+

{{ total_pdfs }}

+
+
+
+
+
+
+ +
Total Questions
+

{{ total_questions }}

+
+
+
+
+
+
+ +
Total Classified Questions
+

{{ total_classified_questions }}

+
+
+
+
+
+
+
+{% endblock %} diff --git a/templates/classified_edit.html b/templates/classified_edit.html new file mode 100644 index 0000000000000000000000000000000000000000..2455717cd7f380fa675beb3fff6693d3a45c0c1a --- /dev/null +++ b/templates/classified_edit.html @@ -0,0 +1,278 @@ +{% extends "base.html" %} + +{% block title %}Edit Classified Questions{% endblock %} + +{% block content %} +
+
+
+

Edit Classified Questions

+ Back to NeetPrep Home +
+
+ +
+
+ +
+
+ +
+
+ + +
+ Filter by Tag: + + + {% for tag in all_tags %} + + {% endfor %} + +
+ + +
+ +
+ + +
+ + + + + + + + + + + + + {% for q in questions %} + + + + + + + + + {% endfor %} + +
Question TextChapterSubjectTagsAction
{{ q.question_text_plain }}{{ q.chapter }}{{ q.subject }}{{ q.tags }} + + +
+
+
+
+
+ + + +{% endblock %} + +{% block scripts %} + +{% endblock %} diff --git a/templates/color_rm.html b/templates/color_rm.html new file mode 100644 index 0000000000000000000000000000000000000000..8728c397b076ea8a57ec1af2f3211f0cd71ea85f --- /dev/null +++ b/templates/color_rm.html @@ -0,0 +1,1035 @@ + + + + + + Color Removal Tool ({{ image_index + 1 }} / {{ total_pages }}) + + + + + + +
+
Loading...
+
+ +
+
+

Color Remove

+
+ + +
+ Pg + + / {{ total_pages }} +
+ + +
+ + + +
+ +
+
+ +
+
+
+ +
+
+
+
+
+
+
+
+
+
+
+
+ +
+
+
+ Click on the image to select colors to keep. All other colors will be removed. +
Now using LAB Color Space for human-like perception. +
+ +
+ +
+ + +
+
+ + +
+
+ +
Selected Colors
+
+ No colors selected +
+ +
+ +
Settings
+ +
+ + + +
+ Loose + Strict +
+
+ +
+ +
+ + + + + + + + +
+
+ +
+ + +
+
+
+
+
+ + + + + diff --git a/templates/color_rm_upload.html b/templates/color_rm_upload.html new file mode 100644 index 0000000000000000000000000000000000000000..c539519448a74a28b753e89dd0ca68f8bb966955 --- /dev/null +++ b/templates/color_rm_upload.html @@ -0,0 +1,391 @@ +{% extends "base.html" %} + +{% block title %}Color Removal Tool{% endblock %} + +{% block head %} + + + +{% endblock %} + +{% block content %} +
+
+ + +
+
+

Color Removal Tool

+

Upload an image to start removing colors

+
+
+ +
+ + {% include 'camera_receiver_component.html' %} + +
+ + +
+
+ + +
+
+ + Tap to select file + Support for PDF, PNG, JPG + + + +
+
+ + + +
+ +
+ + + +
+
+
+{% endblock %} + +{% block scripts %} +{% endblock %} \ No newline at end of file diff --git a/templates/crop.html b/templates/crop.html new file mode 100644 index 0000000000000000000000000000000000000000..1028d75ecefe21e17d66f05fb5c2056a6721e6cc --- /dev/null +++ b/templates/crop.html @@ -0,0 +1,346 @@ + + + + + + Adjust & Enhance + + + + + +
+
Adjust & Enhance
+
+
+
+ Image to adjust + +
+
+
+
+
Quick Presets
+
+
+
Manual Adjustments
+
+
+
+
+
+ + +
+
+
+
+
+
+ + + + + diff --git a/templates/cropv2.html b/templates/cropv2.html new file mode 100644 index 0000000000000000000000000000000000000000..0f68f00c24a31440d2edcbeb91c4b0b1f536af3b --- /dev/null +++ b/templates/cropv2.html @@ -0,0 +1,687 @@ + + + + + + Crop Page {{ image_index + 1 }} + + + + + + {% include '_navbar.html' %} + +
+

Page {{ image_index + 1 }} / {{ total_pages }}

+
+ + + +
+
+ +
+
+
+ Page +
+ + +
+ + + + +
+
+ + +
+
+
Select a box to edit
+
+
+
+ +
+
+
+
+
+
+
+ + +
+
+
+
+
+ +
+
+ +
+ {% for page in all_pages %} +
+ Page {{ page.image_index + 1 }} +
{{ page.image_index + 1 }}
+
+ {% endfor %} +
+
+ +
+
+ + + + + diff --git a/templates/cropv2.html.bak b/templates/cropv2.html.bak new file mode 100644 index 0000000000000000000000000000000000000000..29ee690fd7da6625ae9d4603ae09b0c1ab639728 --- /dev/null +++ b/templates/cropv2.html.bak @@ -0,0 +1,511 @@ +{% extends "base.html" %} + +{% block title %}Step 2: Draw Boxes ({{ image_index + 1 }} / {{ total_pages }}){% endblock %} + +{% block head %} + +{% endblock %} + +{% block content %} +
+
+ Loading... +
+
+
+
+
+

Page {{ image_index + 1 }} of {{ total_pages }}

+ +
+
+ + + + +
+
+
+
+ Progress: {{ image_index + 1 }} of {{ total_pages }} pages + {{ total_pages - image_index - 1 }} pages left +
+
+
+
+
+
+
+
+ PDF Page + +
+ + + +
+
+
+
+
+
Click and drag to draw boxes. Click a box to select it. Drag corners to resize.

+
Page Adjustments
+
+
+
+
+
+
+
+
+
+ {% for i in range(total_pages) %} + {{ i + 1 }} + {% endfor %} +
+
+
+{% endblock %} + +{% block scripts %} + +{% endblock %} \ No newline at end of file diff --git a/templates/dashboard.html b/templates/dashboard.html new file mode 100644 index 0000000000000000000000000000000000000000..5aa72876e1dcf110c263676c8d330e6359cb2d3c --- /dev/null +++ b/templates/dashboard.html @@ -0,0 +1,336 @@ + +{% extends "base.html" %} + +{% block title %}Session Dashboard{% endblock %} + +{% block head %} + +{% endblock %} + +{% block content %} +
+
+

Session Dashboard

+ +
+ +
+ + + + + + + + + + {% if show_size %} + + {% endif %} + + + + + + {% for session in sessions %} + + + + + + + + {% if show_size %} + + {% endif %} + + + + {% else %} + {% if show_size %} + + + + {% else %} + + + + {% endif %} + {% endfor %} + +
S.No.NameCreated AtPagesQuestionsSizeStatusActions
{{ loop.index }} + {{ session.name or session.original_filename }} + + + {{ session.created_at | humanize }}{{ session.page_count }}{{ session.question_count }}{{ session.total_size_formatted }} + {% if session.persist %} + Persisted + {% else %} + Not Persisted + {% endif %} + +
+ {% if session.session_type == 'color_rm' %} + Color RM + + {% if show_size %} + + {% endif %} + {% else %} + View + Crop + + {% if show_size %} + + {% endif %} + {% endif %} +
+
No sessions found.
No sessions found.
+
+
+{% endblock %} + +{% block scripts %} +{{ super() }} + +{% endblock %} diff --git a/templates/drive_browser.html b/templates/drive_browser.html new file mode 100644 index 0000000000000000000000000000000000000000..3d62c44757f061b19d09421eee07e08cb6c29084 --- /dev/null +++ b/templates/drive_browser.html @@ -0,0 +1,219 @@ +{% extends "base.html" %} + +{% block title %}Browse Drive{% endblock %} + +{% block styles %} + +{% endblock %} + +{% block content %} +
+ + +
+ + +
+
+ + +
+
+
+ + +
+
+ + +
+ +
+ + +
+ {% for item in items %} +
+ {% if item.type == 'folder' %} + {% set link = '/drive/api/browse/' + item.path if is_api else '/drive/browse/' + source.id|string + '/' + item.path %} + {% set icon = 'bi-folder-fill text-warning' %} + {% else %} + {% set link = '/drive/api/open/' + item.path if is_api else '/drive/file/' + source.id|string + '/' + item.path %} + {% if item.type == 'pdf' %} + {% set icon = 'bi-file-earmark-pdf-fill text-danger' %} + {% else %} + {% set icon = 'bi-file-earmark-image-fill text-info' %} + {% endif %} + {% endif %} + +
+ +
+ +
{{ item.name }}
+
+
+
+ {% else %} +
+

Empty folder or not synced yet.

+
+ {% endfor %} +
+ + +
+ + + + + + + + + + + {% for item in items %} + {% if item.type == 'folder' %} + {% set link = '/drive/api/browse/' + item.path if is_api else '/drive/browse/' + source.id|string + '/' + item.path %} + {% set icon = 'bi-folder-fill text-warning' %} + {% else %} + {% set link = '/drive/api/open/' + item.path if is_api else '/drive/file/' + source.id|string + '/' + item.path %} + {% if item.type == 'pdf' %} + {% set icon = 'bi-file-earmark-pdf-fill text-danger' %} + {% else %} + {% set icon = 'bi-file-earmark-image-fill text-info' %} + {% endif %} + {% endif %} + + + + + + + + {% endfor %} + +
TypeNameAction
+ {{ item.name }} + + +
+
+
+ + + + + +{% endblock %} \ No newline at end of file diff --git a/templates/drive_connect_manual.html b/templates/drive_connect_manual.html new file mode 100644 index 0000000000000000000000000000000000000000..0d1b90777bdc619071bfdcd35239d88696136aa6 --- /dev/null +++ b/templates/drive_connect_manual.html @@ -0,0 +1,51 @@ +{% extends "base.html" %} + +{% block title %}Connect Google Drive{% endblock %} + +{% block content %} +
+
+
+
+
+

Connect Google Drive (Manual Mode)

+
+
+
+ + Since this app is running on a server without a public domain, we use a manual copy-paste method for authentication. +
+ +
Step 1: Authorize
+

Click the button below to open Google Login in a new tab. You will be asked to allow access.

+ + Authorize on Google + + +
Step 2: Copy the Result
+

+ After authorizing, you might be redirected to a page that says "This site can't be reached" or "localhost refused to connect". +
+ This is expected! Copy the entire URL from your browser's address bar (it should look like http://localhost/?code=...). +

+ +
Step 3: Paste & Verify
+
+
+ + +
+ +
+
+
+ +
+ Cancel +
+
+
+
+{% endblock %} diff --git a/templates/drive_manager.html b/templates/drive_manager.html new file mode 100644 index 0000000000000000000000000000000000000000..f706d669f3321ea7620f2cc9ec706680c084eb07 --- /dev/null +++ b/templates/drive_manager.html @@ -0,0 +1,213 @@ +{% extends "base.html" %} + +{% block title %}Drive Manager{% endblock %} + +{% block styles %} + +{% endblock %} + +{% block content %} +
+
+

Drive Sync Manager

+
+ {% if not drive_connected %} + + Connect Drive + + {% endif %} + +
+
+ + {% if recent_pdfs %} +
+
+
Recently Opened PDFs
+
+
+
+ {% for pdf in recent_pdfs %} +
+
+
+
+
+ {{ pdf.filename }} +
+
+
+ + {{ pdf.opened_at }} + +
+
+
+
+ {% endfor %} +
+
+
+ {% endif %} + + {% if drive_connected %} +
+
+
+
My Drive
+ Browse your personal Google Drive (API) +
+ +
+
+ {% endif %} + +
+ {% for source in sources %} +
+
+
+
+
+ {% if source.source_type == 'file' %} + + {% else %} + + {% endif %} + {{ source.name }} +
+ +
+

{{ source.url }}

+
+ + Last Synced: {{ source.last_synced or 'Never' }} + +
+
+
+
+ {% else %} +
+ +

No drive sources added yet.

+
+ {% endfor %} +
+
+ + + + +{% endblock %} + +{% block scripts %} + +{% endblock %} diff --git a/templates/image_upload.html b/templates/image_upload.html new file mode 100644 index 0000000000000000000000000000000000000000..fed90cafdb4e5240c2167c862ed51fe13644ccb4 --- /dev/null +++ b/templates/image_upload.html @@ -0,0 +1,312 @@ +{% extends "base.html" %} + +{% block title %}Upload Images{% endblock %} + +{% block head %} + + + +{% endblock %} + +{% block content %} +
+
+ + +
+
+

Upload Images

+

Select pages or screenshots to analyze

+
+
+ +
+ + {% include 'camera_receiver_component.html' %} + +
+ + +
+
+ + +
+
+ + Tap to select images + Support for PNG, JPG, JPEG + + + +
+
+ + + +
+ +
+ + + +
+
+
+{% endblock %} + +{% block scripts %} + +{% endblock %} diff --git a/templates/index.html b/templates/index.html new file mode 100644 index 0000000000000000000000000000000000000000..9252e02ab25c9fd15d6ec779e6efa573ac8a8f7c --- /dev/null +++ b/templates/index.html @@ -0,0 +1,258 @@ + + + + + + DocuPDF - Upload + + + + + + +
+
+
+

Step 1: Select Your Questions

+
+
+ + + + +
+ + +
+ + +
+
Preview & Order (Drag to reorder)
+
+ +
+
+ + + +
+
+
+
+ + + + + diff --git a/templates/indexv2.html b/templates/indexv2.html new file mode 100644 index 0000000000000000000000000000000000000000..f77780be699d582339ab6bfddd9834c1c1db54b6 --- /dev/null +++ b/templates/indexv2.html @@ -0,0 +1,469 @@ +{% extends "base.html" %} + +{% block title %}Upload PDF{% endblock %} + +{% block head %} + + + +{% endblock %} + +{% block content %} +
+
+ + +
+
+
+ +
+

Analyze PDF

+

Upload a document to extract questions

+
+
+ +
+ + + + +
+
+ + +
+
+ Tap to select or drag PDF here + Choose File + +
+
+ + +
+
+ + +
Google Drive sharing links supported
+
+
+ + +
+
+ + +
+
+
+ + + +
+ +
+ + +
+
+ + Upload Images Instead + + + +
+ +
+
+
+ {% if pdfs %} + {% for pdf in pdfs %} + +
{{ pdf.filename }}
+ +
+ {% endfor %} + {% else %} +
No recent files found.
+ {% endif %} +
+
+
+
+ +
+
+
+{% endblock %} + +{% block scripts %} + +{% endblock %} diff --git a/templates/json_upload.html b/templates/json_upload.html new file mode 100644 index 0000000000000000000000000000000000000000..eabf6e0ac50ca633f86ce540c55eae0d8ef71a06 --- /dev/null +++ b/templates/json_upload.html @@ -0,0 +1,215 @@ +{% extends "base.html" %} + +{% block title %}Upload JSON{% endblock %} + +{% block head %} + + +{% endblock %} + +{% block content %} +
+
+
+

Upload JSON Data

+
+
+
+
+ + +
+
+ +
+ + +
+
+ + +
+
+ + +
+
+ +
+
+
+
+
+{% endblock %} + +{% block scripts %} + +{% endblock %} diff --git a/templates/login.html b/templates/login.html new file mode 100644 index 0000000000000000000000000000000000000000..56d76720ec920aaa2a90a012b7ec7f99967ceb64 --- /dev/null +++ b/templates/login.html @@ -0,0 +1,47 @@ +{% extends "base.html" %} + +{% block title %}Login{% endblock %} + +{% block content %} +
+
+
+
+
+

Login

+
+
+ {% with messages = get_flashed_messages() %} + {% if messages %} + + {% endif %} + {% endwith %} +
+ +
+ + +
+
+ + +
+
+ + +
+
+ +
+
+
+ +
+
+
+
+{% endblock %} diff --git a/templates/main.html b/templates/main.html new file mode 100644 index 0000000000000000000000000000000000000000..2fea54c1deea5da1bb2078ae66ddc8b60ae46ba7 --- /dev/null +++ b/templates/main.html @@ -0,0 +1,55 @@ +{% extends "base.html" %} + +{% block title %}DocuPDF - Upload Options{% endblock %} + +{% block content %} +
+
+
+
+

Choose Upload Method

+
+
+
+
+
+
+
+ +
+
PDF Upload
+

Upload a PDF document and extract questions from each page.

+ Upload PDF +
+
+
+
+
+
+
+ +
+
Image Upload
+

Upload multiple images and extract questions from each image.

+ Upload Images +
+
+
+
+
+
+
+ +
+
JSON Upload
+

Create questions from a JSON formatted text.

+ Upload JSON +
+
+
+
+
+
+
+
+{% endblock %} \ No newline at end of file diff --git a/templates/neetprep.html b/templates/neetprep.html new file mode 100644 index 0000000000000000000000000000000000000000..62f19a61f56edab09d93fabe53d2167216523f2f --- /dev/null +++ b/templates/neetprep.html @@ -0,0 +1,284 @@ +{% extends "base.html" %} + +{% block title %}NeetPrep Incorrect Questions{% endblock %} + +{% block head %} + +{% endblock %} + +{% block content %} +
+
+
+

NeetPrep Incorrect Question Manager

+
+
+ {% if neetprep_enabled %} +
+ +
+ + +
+
+
+ + ({{ unclassified_count }} unclassified) +
+ {% else %} + + {% endif %} + +
+ + +
+ +

Filter by Subject

+
+
+
+ +
+
+
+ +

Generate PDF

+
+
+ {% if neetprep_enabled %} + + Edit Synced Questions + {% endif %} + + Edit Classified Questions +
+ +
Or, Generate by Topic:
+
+ + + + + + + {% if neetprep_enabled %}{% endif %} + + + + + {% if topics %} + {% for item in topics %} + + + + + {% if neetprep_enabled %}{% endif %} + + + {% endfor %} + {% else %} + + + + {% endif %} + +
S.No.TopicNeetPrep QuestionsMy Questions
{{ loop.index }}{{ item.topic }}{{ item.neetprep_count }}{{ item.my_questions_count }}
No topics found. {% if neetprep_enabled %}Sync with NeetPrep to fetch questions and topics.{% else %}No classified topics found.{% endif %}
+
+ +
+
+
+
+{% endblock %} + +{% block scripts %} + +{% endblock %} \ No newline at end of file diff --git a/templates/neetprep_edit.html b/templates/neetprep_edit.html new file mode 100644 index 0000000000000000000000000000000000000000..ae037eb1eeead3e52bd0bb8427eaf1e3d3d2bc69 --- /dev/null +++ b/templates/neetprep_edit.html @@ -0,0 +1,172 @@ +{% extends "base.html" %} + +{% block title %}Edit NeetPrep Questions{% endblock %} + +{% block content %} +
+
+
+

Edit NeetPrep Questions

+ Back to NeetPrep Home +
+
+ +
+
+ +
+
+ +
+
+ + +
+ + + + + + + + + + + {% for q in questions %} + + + + + + + {% endfor %} + +
Question TextTopicSubjectAction
{{ q.question_text_plain }}{{ q.topic }}{{ q.subject }} + +
+
+
+
+
+ + + +{% endblock %} + +{% block scripts %} + +{% endblock %} diff --git a/templates/pdf_manager.html b/templates/pdf_manager.html new file mode 100644 index 0000000000000000000000000000000000000000..3779227d0c3e21de941247e376ecabc19ae4aed8 --- /dev/null +++ b/templates/pdf_manager.html @@ -0,0 +1,840 @@ +{% extends "base.html" %} + +{% block title %}PDF Manager - DocuPDF{% endblock %} + +{% block styles %} + +{% endblock %} + +{% block content %} +
+ +
+

PDF Manager

+
+ {% if all_view %} + Show Folders + {% else %} + Show All PDFs + {% endif %} + + Upload + +
+
+ + +
+
+ + +
+ + + + + + + + +
+ + {% if not all_view %} + + + {% endif %} + +
+
+ {% if not all_view %} + + {% for folder in subfolders %} +
+
+
+ +
{{ folder.name }}
+

{{ folder.created_at.strftime('%Y-%m-%d %I:%M %p') }}

+
+ +
+
+ {% endfor %} + {% endif %} + + + {% for pdf in pdfs %} +
+
+ +
+

+ Subject: {{ pdf.subject or 'No subject' }} + + + +

+

+ {% if pdf.tags %} + {% for tag in pdf.tags.split(',') %} + {{ tag.strip() }} + {% endfor %} + {% endif %} +

+
+ +
+
+ {% endfor %} +
+
+ + + + + + + + + + + + + +{% endblock %} + +{% block scripts %} + +{% endblock %} diff --git a/templates/pdfjs_viewer.html b/templates/pdfjs_viewer.html new file mode 100644 index 0000000000000000000000000000000000000000..58d537906017d2ec6f6f1f21eda1df1c0fd350b8 --- /dev/null +++ b/templates/pdfjs_viewer.html @@ -0,0 +1,935 @@ + + + + + + {{ pdf_title }} + + + + + + + +
+ +
+
+
+ + +
+ +
+
+ +
+ + + + of -- +
+ +
+ + + +
+ +
+ + +
+
+ + +
+ + +
+
+
+
+
+ + +
+ +
+ + + + + + + diff --git a/templates/processed_json.html b/templates/processed_json.html new file mode 100644 index 0000000000000000000000000000000000000000..db312d8ab36488e35676dfd9846ce01aba4ddb2b --- /dev/null +++ b/templates/processed_json.html @@ -0,0 +1,31 @@ +{% extends 'base.html' %} + +{% block content %} +
+

Processed Quiz Data

+
+ + + + + + + + + + + + {% for q in questions %} + + + + + + + {% endfor %} + +
QuestionYour AnswerCorrect AnswerStatus
{{ q.question | safe }}{{ q.yourAnswer | safe }}{{ q.correctAnswer | safe }}{{ q.status }}
+ +
+
+{% endblock %} diff --git a/templates/qtab_exam.html b/templates/qtab_exam.html new file mode 100644 index 0000000000000000000000000000000000000000..ebe4b3f05ac5a0156084116e5548d957069e4e58 --- /dev/null +++ b/templates/qtab_exam.html @@ -0,0 +1,1522 @@ +{% extends "base.html" %} + +{% block title %}Exam Mode - Question Table{% endblock %} + +{% block head %} + + +{% endblock %} + +{% block content %} +
+ +
+
+
+ + + + + +
+ + + + + +
+
+
+ Loading... +
+
+
+ + + + + +
+ +
+
+ +
+
+ + + +
+ + +
+
+
+
+
Settings
+ +
+ +
+
Display
+
+ + +
+
+ + +
+
+ + +
+
+ +
+
Timer
+
+ + +
+
+ + +
+
+ +
+
Navigation
+
+ + +
+
+ + +
+
+ +
+
Quick Actions
+ + +
+
+
+ + + + + + + + + + + +{% endblock %} + diff --git a/templates/qtab_list.html b/templates/qtab_list.html new file mode 100644 index 0000000000000000000000000000000000000000..794231750322c964978fbcba8b94f39ee45e42f4 --- /dev/null +++ b/templates/qtab_list.html @@ -0,0 +1,702 @@ +{% extends "base.html" %} + +{% block title %}Question Table Generator{% endblock %} + +{% block head %} + +{% endblock %} + +{% block content %} +
+ +
+
+

Question Table Generator

+ +
+
+ + + +
+
+ + + {% with messages = get_flashed_messages(with_categories=true) %} + {% if messages %} + {% for category, message in messages %} + + {% endfor %} + {% endif %} + {% endwith %} + + +
+ +

Upload Image for Question Extraction

+

Drag and drop an image here, or click to select

+ + +
+ Ctrl+V to paste image from clipboard +
+
+ Processing... +
+
+ + + {% if subfolders %} +

Folders

+
+ {% for folder in subfolders %} +
+ +
{{ folder.name }}
+
+ {% endfor %} +
+ {% endif %} + + + {% if images %} +

Processed Images

+
+ {% for image in images %} +
+ + {{ image.original_name }} +
+
+
{{ image.original_name }}
+ +
+ {{ image.created_at }} + + {% if image.result_json %} +
+
+
+ + {% endif %} + +
+ + Exam Mode + + +
+
+
+ {% endfor %} +
+ {% elif not subfolders %} +
+ No images uploaded yet. Upload an image to get started! +
+ {% endif %} +
+ + + + + + + + + + + +{% endblock %} diff --git a/templates/question_entry.html b/templates/question_entry.html new file mode 100644 index 0000000000000000000000000000000000000000..d97fbf1cfad88237470a448fd7a423a44139994e --- /dev/null +++ b/templates/question_entry.html @@ -0,0 +1,198 @@ + + + + + + Enter Question Details + + + +
+
+
+

Step 3: Enter Question Details

+
+
+
+ +
+ + +
+ + {% for image in images %} +
+
+ +
+
+
{{ image.original_name }}
+
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+ +
+ +
+ +
+ +
+
+
+
+
+
+ {% endfor %} + +
+ +

Generate PDF

+
+
+ + +
+
+ + +
+
+ + +
+
+ +
+
+
+
+
+ + + + + diff --git a/templates/question_entry_v2.html b/templates/question_entry_v2.html new file mode 100644 index 0000000000000000000000000000000000000000..ec82f06e0d978800bf15aa9b478c9c5bab57097f --- /dev/null +++ b/templates/question_entry_v2.html @@ -0,0 +1,911 @@ +{% extends "base.html" %} + +{% block title %}Enter Question Details (V2){% endblock %} + +{% block head %} + +{% endblock %} + +{% block content %} +
+
+
+
+

Step 3: Enter Question Details

+
Shortcuts: Shift+Q for Next | Shift+C (Correct) | Shift+W (Wrong) | Shift+U (Unattempted)
+ {% if classified_count is defined and total_questions is defined %} +
+ + Classification: + {{ classified_count }}/{{ total_questions }} questions + +
+ {% endif %} +
+ +
+
+
+ Upload Answer Key (Optional) +
+ + +
+
+ +
+ + + {% if not nvidia_nim_available %} + + {% else %} +
+ + +
+ {% endif %} + + {% for image in images %} +
+ + Question {{ loop.index }} + + +
+ Cropped Question {{ loop.index }} +
+
+
+
+ + {% if nvidia_nim_available %} +
+ + +
+ {% else %} + + {% endif %} +
+
+ + +
+
+ + +
+
+ + +
+ + + +
+
+
+
+

Subject: {{ image.subject or 'N/A' }}

+
+
+

Chapter: {{ image.chapter or 'N/A' }}

+
+
+
+ {% endfor %} + +
+ Generate PDF +
+
+ + +
+
+ + +
+
+
+
+ + +
+
+ + +
+
+
+ Layout Options +
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+
+
+
+ + +
+
+ + +
+
+ + +
+
+
+
+ +
+ + +
+
+ + + + +
+
+

+ +

+
+
+
+
+
+
+
+
+
+
+
+ +
+
+

Added Miscellaneous Questions:

+
+
+
+
+
+ +
+
+
+
+{% endblock %} + +{% block scripts %} + +{% endblock %} \ No newline at end of file diff --git a/templates/quiz.html b/templates/quiz.html new file mode 100644 index 0000000000000000000000000000000000000000..671660220eefb950698ceed2e38cb1e9d16ebac3 --- /dev/null +++ b/templates/quiz.html @@ -0,0 +1,92 @@ +{% block head %} + +{% endblock %} diff --git a/templates/quiz_v2.html b/templates/quiz_v2.html new file mode 100644 index 0000000000000000000000000000000000000000..71b45cea28084b1c454be7a39c8b9868342c51ae --- /dev/null +++ b/templates/quiz_v2.html @@ -0,0 +1,236 @@ +{% extends "base.html" %} + +{% block title %}Quiz Mode{% endblock %} + +{% block head %} + +{% endblock %} + +{% block content %} +
+ +
+
+
+ + +
+ Question +
+
+
+ + +
+
Details
+
+
+ + +
+ +
+ +
+ + +
+ +
+
+ + +
+ +
+
+
+ + +{% endblock %} + +{% block scripts %} + +{% endblock %} diff --git a/templates/redact_status.html b/templates/redact_status.html new file mode 100644 index 0000000000000000000000000000000000000000..90b695f17c2ae78906924edbf8a394089a5d1761 --- /dev/null +++ b/templates/redact_status.html @@ -0,0 +1,89 @@ +{% extends "base.html" %} + +{% block title %}Redacting Document{% endblock %} + +{% block content %} +
+
+
+

Redaction in Progress

+
+
+

Your document is being automatically redacted. Please wait, this may take a few minutes depending on the document size.

+
+
+
+
+
Initializing...
+
+ +
+
+
+{% endblock %} + +{% block scripts %} + +{% endblock %} diff --git a/templates/register.html b/templates/register.html new file mode 100644 index 0000000000000000000000000000000000000000..ea742cade84e04bd52638096d24730f6c81b9263 --- /dev/null +++ b/templates/register.html @@ -0,0 +1,46 @@ +{% extends "base.html" %} + +{% block title %}Register{% endblock %} + +{% block content %} +
+
+
+
+
+

Create Account

+
+
+ {% with messages = get_flashed_messages() %} + {% if messages %} + + {% endif %} + {% endwith %} +
+
+ + +
+
+ + +
+
+ + +
+
+ +
+
+
+ +
+
+
+
+{% endblock %} diff --git a/templates/reorder_modal.html b/templates/reorder_modal.html new file mode 100644 index 0000000000000000000000000000000000000000..e08098b1c2e684beaf4dd07b4473132df02218d9 --- /dev/null +++ b/templates/reorder_modal.html @@ -0,0 +1,85 @@ + + + + + + + + diff --git a/templates/resize.html b/templates/resize.html new file mode 100644 index 0000000000000000000000000000000000000000..fd656139c56a8e140c6de1ca070c9e28a258c8b1 --- /dev/null +++ b/templates/resize.html @@ -0,0 +1,1143 @@ +{% extends "base.html" %} + +{% block title %}Resize PDF - DocuPDF{% endblock %} + +{% block styles %} + +{% endblock %} + +{% block content %} +
+
+
+

Resize PDF for Notes

+
+ +
+
+ +
+ +
+
+
Select PDF File
+
+ + + + + + + + +
+ {% if not subfolders and not pdfs %} +
+ +

No files found

+
+ {% else %} + {% for folder in subfolders %} +
+ + {{ folder.name }} + +
+ {% endfor %} + + {% for pdf in pdfs %} +
+ + {{ pdf.filename }} + {% if pdf.persist %} + + {% endif %} +
+ {% endfor %} + {% endif %} +
+ + +
+ + +
+
+
Options
+
+ +
+ + + + +
+ + +
+ + +
+ Processing Mode + + + + + + + + + +
+ + +
+ +
+ + +
+ Background Color +
+
+
+
+
+
+ +
+ +
+ + +
+ Background Pattern +
+ + + +
+
+
+ + +
+ +
+
+
+
+
+
+
+ + + +{% endblock %} + +{% block scripts %} + +{% endblock %} diff --git a/templates/settings.html b/templates/settings.html new file mode 100644 index 0000000000000000000000000000000000000000..1da981a8c40aa5bf02fee2ab8e4902017a60d55d --- /dev/null +++ b/templates/settings.html @@ -0,0 +1,106 @@ +{% extends "base.html" %} + +{% block title %}Settings{% endblock %} + +{% block content %} +
+
+
+
+
+

Application Settings

+
+
+ {% with messages = get_flashed_messages(with_categories=true) %} + {% if messages %} + {% for category, message in messages %} + + {% endfor %} + {% endif %} + {% endwith %} + +
+
+ Google Drive Integration +
+ +
+ + +
+
+ Status: + {% if client_secret_exists %} + Configured + {% else %} + Not Found + {% endif %} +
+ Upload the client_secret.json file from Google Cloud Console to enable Drive integration. +
+
+ Required Redirect URI:
+ {{ drive_redirect_uri }} +
+ Ensure this URI is exactly listed in your Google Cloud Console "Authorized redirect URIs". +
+
+
+
+
+ NeetPrep Settings +
+ + +
+ Enable synchronization with NeetPrep for question data. +
+
+
+
+
+ Classifier Settings +
+ + +
+ Choose the AI model for automatic question classification. Gemini uses Google's Gemini API, while Nova uses Amazon's Nova model via OpenRouter. Requires relevant API keys to be set. +
+
+
+
+
+ PDF & Image Settings +
+ + +
+ Set the resolution for converting PDF pages to images. Higher values (e.g., 300) are better for quality and OCR but create larger files. Lower values (e.g., 150) are faster. Default: 300, Max: 900. +
+
+
+ + +
+ Set the output resolution (DPI) for processed images from the Color Removal tool when generating PDFs. Default: 200, Max: 600. +
+
+
+
+ +
+
+
+
+
+
+{% endblock %} diff --git a/templates/simple_viewer.html b/templates/simple_viewer.html new file mode 100644 index 0000000000000000000000000000000000000000..53adc572a2492be0eab79e947abe7bb997f833ff --- /dev/null +++ b/templates/simple_viewer.html @@ -0,0 +1,112 @@ + + + + + + {{ pdf_title }} + + + +
+
+

{{ pdf_title }}

+
+
+ PDF Page +
+ +
+ + + + diff --git a/templates/subjective_generator.html b/templates/subjective_generator.html new file mode 100644 index 0000000000000000000000000000000000000000..ce435d6f97ae9544aebce1ebf109c0186c797d49 --- /dev/null +++ b/templates/subjective_generator.html @@ -0,0 +1,303 @@ +{% extends "base.html" %} + +{% block title %}Subjective Question Generator{% endblock %} + +{% block head %} + + +{% endblock %} + +{% block content %} +
+
+ + +
+
+
+ +
+
+
+

Subjective Generator

+

AI will transcribe and format your handwritten or printed questions.

+
+
+ +
+ + {% include 'camera_receiver_component.html' %} + +
+ + +
+
+ + +
+
+ + Preview + + +
+ +
Tap to upload image
+
Supports JPG, PNG (Max 10MB)
+
+ + +
+
+ + +
+
+

+ +

+
+
+

If Gemini is unavailable, paste the raw JSON response here.

+ +
+
+
+
+ + + + +
+
+
+
+{% endblock %} + +{% block scripts %} + +{% endblock %} diff --git a/templates/subjective_list.html b/templates/subjective_list.html new file mode 100644 index 0000000000000000000000000000000000000000..b118c7218a1c14692249ad428b2e53b9755950e2 --- /dev/null +++ b/templates/subjective_list.html @@ -0,0 +1,835 @@ +{% extends "base.html" %} + +{% block title %}Subjective Questions Manager{% endblock %} + +{% block head %} + + + + + + +{% endblock %} + +{% block content %} +
+ + +
+
+

Subjective Questions

+ +
+
+ + + + + + Print PDF + + + Generator + +
+
+ + + {% with messages = get_flashed_messages(with_categories=true) %} + {% if messages %} + {% for category, message in messages %} + + {% endfor %} + {% endif %} + {% endwith %} + + + {% if subfolders %} +
+ {% for folder in subfolders %} + + {% endfor %} +
+ {% endif %} + + + {% if grouped_questions %} + {% for topic, questions_list in grouped_questions.items() %} +
+
+
{{ topic }}
+
+ + + + {{ questions_list|length }} Questions +
+ +
+
+
+
+
+ + + + + + + + + + + {% for q in questions_list %} + + + + + + + {% endfor %} + +
#QuestionActions
{{ q.question_number_within_topic }}
{{ q.question_html | safe }}
+ + +
+
+
+
+ {% endfor %} + {% elif not subfolders %} +
+

No Content

+

This folder is empty. Generate new questions or create subfolders.

+ Generate Questions +
+ {% endif %} +
+ + + + + + + +{% include 'reorder_modal.html' %} + + + + + + + + +{% endblock %} diff --git a/templates/subjective_list_backup.html b/templates/subjective_list_backup.html new file mode 100644 index 0000000000000000000000000000000000000000..b118c7218a1c14692249ad428b2e53b9755950e2 --- /dev/null +++ b/templates/subjective_list_backup.html @@ -0,0 +1,835 @@ +{% extends "base.html" %} + +{% block title %}Subjective Questions Manager{% endblock %} + +{% block head %} + + + + + + +{% endblock %} + +{% block content %} +
+ + +
+
+

Subjective Questions

+ +
+
+ + + + + + Print PDF + + + Generator + +
+
+ + + {% with messages = get_flashed_messages(with_categories=true) %} + {% if messages %} + {% for category, message in messages %} + + {% endfor %} + {% endif %} + {% endwith %} + + + {% if subfolders %} +
+ {% for folder in subfolders %} + + {% endfor %} +
+ {% endif %} + + + {% if grouped_questions %} + {% for topic, questions_list in grouped_questions.items() %} +
+
+
{{ topic }}
+
+ + + + {{ questions_list|length }} Questions +
+ +
+
+
+
+
+ + + + + + + + + + + {% for q in questions_list %} + + + + + + + {% endfor %} + +
#QuestionActions
{{ q.question_number_within_topic }}
{{ q.question_html | safe }}
+ + +
+
+
+
+ {% endfor %} + {% elif not subfolders %} +
+

No Content

+

This folder is empty. Generate new questions or create subfolders.

+ Generate Questions +
+ {% endif %} +
+ + + + + + + +{% include 'reorder_modal.html' %} + + + + + + + + +{% endblock %} diff --git a/templates/subjective_print.html b/templates/subjective_print.html new file mode 100644 index 0000000000000000000000000000000000000000..96540409f154441e6b7b926ed382cb5054f18bfc --- /dev/null +++ b/templates/subjective_print.html @@ -0,0 +1,140 @@ + + + + + + Subjective Questions Print View + + + + +
+
+

Questions Preview

+
+ + +
+
+ + {% if grouped_questions %} + {% for topic, questions in grouped_questions.items() %} +
+

{{ topic }}

+ {% for q in questions %} +
+
+
Q{{ q.question_number_within_topic }}.
+
+ {{ q.question_html | safe }} +
+
+
+ {% endfor %} +
+ {% endfor %} + {% else %} +

No questions found in this folder.

+ {% endif %} +
+ + + + \ No newline at end of file diff --git a/templates/subjective_results.html b/templates/subjective_results.html new file mode 100644 index 0000000000000000000000000000000000000000..55e9d01336f4f08235dfbc1764ad28a70d9ef7dc --- /dev/null +++ b/templates/subjective_results.html @@ -0,0 +1,123 @@ +{% extends "base.html" %} + +{% block title %}Generated Questions{% endblock %} + +{% block content %} +
+
+

Generated Questions for "{{ topic }}"

+
+ Back + +
+
+ +
+ {% for topic_name, questions_list in grouped_questions.items() %} +
+

{{ topic_name }}

+
+ + + + + + + + + + {% for q in questions_list %} + + + + + + {% endfor %} + +
#QuestionActions
{{ q.question_number_within_topic }}
+
+
+ {% endfor %} +
+
+ + +{% endblock %} diff --git a/templates/templates/_header.html b/templates/templates/_header.html new file mode 100644 index 0000000000000000000000000000000000000000..67d1745188ef3553cc1ca7221bcb9c3e9211dbfd --- /dev/null +++ b/templates/templates/_header.html @@ -0,0 +1,9 @@ +
+

+ + {{ title | default('DocuPDF') }} +

+
+ {% block header_actions %}{% endblock %} +
+
\ No newline at end of file diff --git a/templates/templates/_nav_links.html b/templates/templates/_nav_links.html new file mode 100644 index 0000000000000000000000000000000000000000..2ee1a531e967c95497f4363030af409bb635abcf --- /dev/null +++ b/templates/templates/_nav_links.html @@ -0,0 +1,4 @@ + diff --git a/templates/templates/_navbar.html b/templates/templates/_navbar.html new file mode 100644 index 0000000000000000000000000000000000000000..833ac5c041e3f3950b04fc1c64937783823ec326 --- /dev/null +++ b/templates/templates/_navbar.html @@ -0,0 +1,6 @@ + diff --git a/templates/templates/_scripts.html b/templates/templates/_scripts.html new file mode 100644 index 0000000000000000000000000000000000000000..063d02b34686a8cc46453bd166c464dcd425d4b1 --- /dev/null +++ b/templates/templates/_scripts.html @@ -0,0 +1 @@ +{# Common scripts can be placed here #} \ No newline at end of file diff --git a/templates/templates/base.html b/templates/templates/base.html new file mode 100644 index 0000000000000000000000000000000000000000..b0150e25c97a21b12b7fb86b992f58a3439ce566 --- /dev/null +++ b/templates/templates/base.html @@ -0,0 +1,18 @@ + + + + + + {% block title %}DocuPDF{% endblock %} + {% block styles %}{% endblock %} + + + {% block head %}{% endblock %} + + + {% include '_navbar.html' %} + {% block content %}{% endblock %} + + {% block scripts %}{% endblock %} + + \ No newline at end of file diff --git a/templates/templates/crop.html b/templates/templates/crop.html new file mode 100644 index 0000000000000000000000000000000000000000..1028d75ecefe21e17d66f05fb5c2056a6721e6cc --- /dev/null +++ b/templates/templates/crop.html @@ -0,0 +1,346 @@ + + + + + + Adjust & Enhance + + + + + +
+
Adjust & Enhance
+
+
+
+ Image to adjust + +
+
+
+
+
Quick Presets
+
+
+
Manual Adjustments
+
+
+
+
+
+ + +
+
+
+
+
+
+ + + + + diff --git a/templates/templates/cropv2.html b/templates/templates/cropv2.html new file mode 100644 index 0000000000000000000000000000000000000000..a0020f210ebb8eefe462d9f8e141ba32b73e3532 --- /dev/null +++ b/templates/templates/cropv2.html @@ -0,0 +1,376 @@ + + + + + + Step 2: Draw Boxes ({{ image_index + 1 }} / {{ total_pages }}) + + + + + + {% include '_navbar.html' %} +
+
+ Loading... +
+
+
+
+

Page {{ image_index + 1 }} of {{ total_pages }}

+
+ + + +
+
+
+
+
+ PDF Page + +
+ + + +
+
+
+
+
+
Click and drag to draw boxes. Click a box to select it. Drag corners to resize.

+
Page Adjustments
+
+
+
+
+
+
+
+
+ + + + \ No newline at end of file diff --git a/templates/templates/cropv2.html.bak b/templates/templates/cropv2.html.bak new file mode 100644 index 0000000000000000000000000000000000000000..29ee690fd7da6625ae9d4603ae09b0c1ab639728 --- /dev/null +++ b/templates/templates/cropv2.html.bak @@ -0,0 +1,511 @@ +{% extends "base.html" %} + +{% block title %}Step 2: Draw Boxes ({{ image_index + 1 }} / {{ total_pages }}){% endblock %} + +{% block head %} + +{% endblock %} + +{% block content %} +
+
+ Loading... +
+
+
+
+
+

Page {{ image_index + 1 }} of {{ total_pages }}

+ +
+
+ + + + +
+
+
+
+ Progress: {{ image_index + 1 }} of {{ total_pages }} pages + {{ total_pages - image_index - 1 }} pages left +
+
+
+
+
+
+
+
+ PDF Page + +
+ + + +
+
+
+
+
+
Click and drag to draw boxes. Click a box to select it. Drag corners to resize.

+
Page Adjustments
+
+
+
+
+
+
+
+
+
+ {% for i in range(total_pages) %} + {{ i + 1 }} + {% endfor %} +
+
+
+{% endblock %} + +{% block scripts %} + +{% endblock %} \ No newline at end of file diff --git a/templates/templates/dashboard.html b/templates/templates/dashboard.html new file mode 100644 index 0000000000000000000000000000000000000000..f97d239ea96f4d1be11b30c71d4db5372dbeeb7f --- /dev/null +++ b/templates/templates/dashboard.html @@ -0,0 +1,96 @@ +{% extends "base.html" %} + +{% block title %}Dashboard - DocuPDF{% endblock %} + +{% block content %} +
+
+
+

Sessions

+
+
+ {% if sessions %} +
+ + + + + + + + + + + + {% for session in sessions %} + + + + + + + + {% endfor %} + +
File NameCreated AtPages/ImagesQuestionsActions
{{ session.pdf_name }}{{ session.created_at }}{{ session.page_count }}{{ session.question_count }} +
+ {% if session.page_count > 0 %} + Edit Files + {% endif %} + {% if session.question_count > 0 %} + Edit Questions + {% endif %} + + +
+
+
+ {% else %} +

No sessions found.

+ {% endif %} +
+
+
+{% endblock %} + +{% block scripts %} + +{% endblock %} \ No newline at end of file diff --git a/templates/templates/image_upload.html b/templates/templates/image_upload.html new file mode 100644 index 0000000000000000000000000000000000000000..aac7974ef0d5fa839eef933977c365c97e2400bf --- /dev/null +++ b/templates/templates/image_upload.html @@ -0,0 +1,72 @@ +{% extends "base.html" %} + +{% block title %}Upload Images for Analysis{% endblock %} + +{% block head %} + +{% endblock %} + +{% block content %} +
+
+
+

Image Question Extractor

+
+
+
+
+ + +
You can select multiple image files (PNG, JPG, JPEG, GIF, BMP)
+
+ +
+
+
+
+
+{% endblock %} + +{% block scripts %} + +{% endblock %} \ No newline at end of file diff --git a/templates/templates/index.html b/templates/templates/index.html new file mode 100644 index 0000000000000000000000000000000000000000..9252e02ab25c9fd15d6ec779e6efa573ac8a8f7c --- /dev/null +++ b/templates/templates/index.html @@ -0,0 +1,258 @@ + + + + + + DocuPDF - Upload + + + + + + +
+
+
+

Step 1: Select Your Questions

+
+
+ + + + +
+ + +
+ + +
+
Preview & Order (Drag to reorder)
+
+ +
+
+ + + +
+
+
+
+ + + + + diff --git a/templates/templates/indexv2.html b/templates/templates/indexv2.html new file mode 100644 index 0000000000000000000000000000000000000000..3ad966b94b4f6a9ef820a57bce40d5e065dc4f9f --- /dev/null +++ b/templates/templates/indexv2.html @@ -0,0 +1,75 @@ +{% extends "base.html" %} + +{% block title %}Upload PDF for Analysis{% endblock %} + +{% block head %} + +{% endblock %} + +{% block content %} +
+
+
+

PDF Question Extractor

+
+
+
+
+ + +
+ +
+ +
+
+
+
+{% endblock %} + +{% block scripts %} + +{% endblock %} \ No newline at end of file diff --git a/templates/templates/main.html b/templates/templates/main.html new file mode 100644 index 0000000000000000000000000000000000000000..0e4bac4c45bd2a830334953a72bdaf984b085e1b --- /dev/null +++ b/templates/templates/main.html @@ -0,0 +1,50 @@ +{% extends "base.html" %} + +{% block title %}DocuPDF - Upload Options{% endblock %} + +{% block head %} + +{% endblock %} + +{% block content %} +
+
+
+

Choose Upload Method

+
+
+
+
+
+
+
+ +
+
PDF Upload
+

Upload a PDF document and extract questions from each page.

+ Upload PDF +
+
+
+
+
+
+
+ +
+
Image Upload
+

Upload multiple images and extract questions from each image.

+ Upload Images +
+
+
+
+
+
+
+{% endblock %} \ No newline at end of file diff --git a/templates/templates/pdf_manager.html b/templates/templates/pdf_manager.html new file mode 100644 index 0000000000000000000000000000000000000000..4a6f7775bdc2918f1b740f81b9bfb53b9d722410 --- /dev/null +++ b/templates/templates/pdf_manager.html @@ -0,0 +1,299 @@ +{% extends "base.html" %} + +{% block title %}PDF Manager - DocuPDF{% endblock %} + +{% block styles %} + +{% endblock %} + +{% block content %} +
+
+

PDF Manager

+
+ Show All PDFs + + New Upload +
+
+ + {% if not all_view %} + + + {% endif %} + + +
+ + + + +
+ +
+
+

Files

+
+
+
+ + + + + + + + + + + + {% if not all_view %} + + {% for folder in subfolders %} + + + + + + + + {% endfor %} + {% endif %} + + + {% for pdf in pdfs %} + + + + + + + + {% endfor %} + +
NameSubjectTagsCreated At
{{ folder }}
+ {% if pdf.persist %}{% endif %} + {{ pdf.filename }} + {{ pdf.subject }}{{ pdf.tags }}{{ pdf.created_at }}
+
+
+
+
+ + + + + + +{% endblock %} + +{% block scripts %} + +{% endblock %} \ No newline at end of file diff --git a/templates/templates/question_entry.html b/templates/templates/question_entry.html new file mode 100644 index 0000000000000000000000000000000000000000..d97fbf1cfad88237470a448fd7a423a44139994e --- /dev/null +++ b/templates/templates/question_entry.html @@ -0,0 +1,198 @@ + + + + + + Enter Question Details + + + +
+
+
+

Step 3: Enter Question Details

+
+
+
+ +
+ + +
+ + {% for image in images %} +
+
+ +
+
+
{{ image.original_name }}
+
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+ +
+ +
+ +
+ +
+
+
+
+
+
+ {% endfor %} + +
+ +

Generate PDF

+
+
+ + +
+
+ + +
+
+ + +
+
+ +
+
+
+
+
+ + + + + diff --git a/templates/templates/question_entry_v2.html b/templates/templates/question_entry_v2.html new file mode 100644 index 0000000000000000000000000000000000000000..cc90cd2f1a0a52e6725753caba2271493642b4fa --- /dev/null +++ b/templates/templates/question_entry_v2.html @@ -0,0 +1,598 @@ +{% extends "base.html" %} + +{% block title %}Enter Question Details (V2){% endblock %} + +{% block head %} + +{% endblock %} + +{% block content %} +
+
+
+
+

Step 3: Enter Question Details

+
Shortcuts: Shift+Q for Next | Shift+C (Correct) | Shift+W (Wrong) | Shift+U (Unattempted)
+
+
+ Go Back to Cropping + {% include '_nav_links.html' %} +
+
+
+
+ Upload Answer Key (Optional) +
+ + +
+
+ +
+ + + {% if not nvidia_nim_available %} + + {% else %} +
+ +
+ {% endif %} + + {% for image in images %} +
+ + Question {{ loop.index }} + + +
+ Cropped Question {{ loop.index }} +
+
+
+
+ + {% if nvidia_nim_available %} +
+ + +
+ {% else %} + + {% endif %} +
+
+ + +
+
+ + +
+
+ + +
+ + + +
+
+
+
+
+ {% endfor %} + +
+ Generate PDF +
+
+ + +
+
+ + +
+
+ + +
+
+
+
+
+
+
+
+
+ + +
+ +
+
+

+ +

+
+
+
+
+
+
+
+
+
+
+
+ +
+
+

Added Miscellaneous Questions:

+
+
+
+
+
+ +
+
+
+
+{% endblock %} + +{% block scripts %} + +{% endblock %} \ No newline at end of file diff --git a/templates/upload_final_pdf.html b/templates/upload_final_pdf.html new file mode 100644 index 0000000000000000000000000000000000000000..b58872b4c7ab5423ad4f70537b2aba3515050061 --- /dev/null +++ b/templates/upload_final_pdf.html @@ -0,0 +1,434 @@ +{% extends "base.html" %} + +{% block title %}Upload Final PDF{% endblock %} + +{% block head %} + + + +{% endblock %} + +{% block content %} +
+
+ + +
+
+
+ +
+

Upload Final PDF

+

Add a finished report to your collection

+
+
+ +
+ +
+ +
+ + +
+ + + + +
+ + +
+
+ Tap to select or drag PDF here + Choose File + +
+
+ + +
+
+ + +
Google Drive sharing links supported
+
+
+ + +
+
+ + +
+
+
+ + + +
+ +
+ + + +
+
+
+{% endblock %} + +{% block scripts %} + +{% endblock %} \ No newline at end of file diff --git a/templates/users.html b/templates/users.html new file mode 100644 index 0000000000000000000000000000000000000000..f992421ad7ee6ec8c4e262c8430e63b8fa93029d --- /dev/null +++ b/templates/users.html @@ -0,0 +1,50 @@ + +{% extends "base.html" %} + +{% block title %}Manage Users{% endblock %} + +{% block content %} +
+

Manage Users

+ + {% with messages = get_flashed_messages(with_categories=true) %} + {% if messages %} + {% for category, message in messages %} + + {% endfor %} + {% endif %} + {% endwith %} + +
+
Create New User
+
+
+
+ + +
+ +
+
+
+ +
+
Existing Users
+
+
    + {% for user in users %} +
  • + {{ user.username }} + Switch to this user +
  • + {% else %} +
  • No users found. Create one above!
  • + {% endfor %} +
+
+
+
+{% endblock %} diff --git a/templates/view_pdf_legacy.html b/templates/view_pdf_legacy.html new file mode 100644 index 0000000000000000000000000000000000000000..d0ec60bd716741e8fc11a3656e3f902b3e9dad8b --- /dev/null +++ b/templates/view_pdf_legacy.html @@ -0,0 +1,57 @@ +{% extends "base.html" %} + +{% block title %}{{ pdf_title }} - Legacy View{% endblock %} + +{% block styles %} + + +{% endblock %} + +{% block content %} +
+
+

{{ pdf_title }} (Legacy View)

+ Back +
+ + +
+{% endblock %} + +{% block scripts %} + + +{% endblock %} diff --git a/tmp_rovodev_api_url_patch.txt b/tmp_rovodev_api_url_patch.txt new file mode 100644 index 0000000000000000000000000000000000000000..c654b6f94ddb4396791cb042e7bd7b278d385754 --- /dev/null +++ b/tmp_rovodev_api_url_patch.txt @@ -0,0 +1,38 @@ +=== PATCH FOR ../pwdlv3/report.pwdl.py === + +1. Add new argument after line 200 (after the --force argument): + + parser.add_argument('--use_new_api', action='store_true', help='Use the new API URL instead of the primary (old) API URL.') + +2. Replace lines 202-207 with: + + # Primary API URL (old batch) + old_api_url = "https://api.penpencil.co/v3/test-service/tests?testType=All&testStatus=All&attemptStatus=All&batchId=678b4cf5a3a368218a2b16e7&isSubjective=false&isPurchased=true&testCategoryIds=6814be5e9467bd0a54703a94" + # Alternative API URL (new batch with additional filters) + new_api_url = "https://api.penpencil.co/v3/test-service/tests?testType=All&testStatus=All&attemptStatus=All&batchId=68d626499dfdb652ac3ea3df&isSubjective=false&categoryId=68d654f20b83f446958276c6&categorySectionId=Other_Tests&isPurchased=true&testCategoryIds=68d654f20b83f446958276c6" + + # Select API URL based on argument (default is old/primary) + selected_api_url = new_api_url if args.use_new_api else old_api_url + print(f"Using {'NEW' if args.use_new_api else 'PRIMARY (OLD)'} API URL") + + all_test_data = Endpoint( + url=selected_api_url, + headers=ScraperModule.batch_api.DEFAULT_HEADERS + ).fetch() + +3. Replace line 216 (in the --all section) - change 'new_api_url' to 'selected_api_url': + + url=selected_api_url, + +4. Replace line 233 (in the --test_id section) - change 'new_api_url' to 'selected_api_url': + + url=selected_api_url, + +=== USAGE === + +By default (no flag): Uses PRIMARY (old) API URL + python report.pwdl.py --all + +To use NEW API URL: Add --use_new_api flag + python report.pwdl.py --all --use_new_api + diff --git a/user_auth.py b/user_auth.py new file mode 100644 index 0000000000000000000000000000000000000000..c122223024c9c4a489ad25181c58da5e2e67d4c1 --- /dev/null +++ b/user_auth.py @@ -0,0 +1,93 @@ +import sqlite3 +from flask_login import LoginManager, UserMixin +from werkzeug.security import generate_password_hash, check_password_hash +from utils import get_db_connection + +class User(UserMixin): + """User model for Flask-Login.""" + def __init__(self, id, username, email, password_hash, neetprep_enabled, dpi, color_rm_dpi, v2_default=0, magnifier_enabled=1, google_token=None, classifier_model='gemini'): + self.id = id + self.username = username + self.email = email + self.password_hash = password_hash + self.neetprep_enabled = neetprep_enabled + self.dpi = dpi + self.color_rm_dpi = color_rm_dpi + self.v2_default = v2_default + self.magnifier_enabled = magnifier_enabled + self.google_token = google_token + self.classifier_model = classifier_model + + @staticmethod + def get(user_id): + conn = get_db_connection() + user_row = conn.execute('SELECT * FROM users WHERE id = ?', (user_id,)).fetchone() + conn.close() + if user_row: + user_data = dict(user_row) + return User( + user_data['id'], + user_data['username'], + user_data['email'], + user_data['password_hash'], + user_data['neetprep_enabled'], + user_data['dpi'], + user_data.get('color_rm_dpi', 200), + user_data.get('v2_default', 0), + user_data.get('magnifier_enabled', 1), + user_data.get('google_token'), + user_data.get('classifier_model', 'gemini') + ) + return None + + @staticmethod + def get_by_username(username): + conn = get_db_connection() + user_row = conn.execute('SELECT * FROM users WHERE username = ?', (username,)).fetchone() + conn.close() + if user_row: + user_data = dict(user_row) + return User( + user_data['id'], + user_data['username'], + user_data['email'], + user_data['password_hash'], + user_data['neetprep_enabled'], + user_data['dpi'], + user_data.get('color_rm_dpi', 200), + user_data.get('v2_default', 0), + user_data.get('magnifier_enabled', 1), + user_data.get('google_token'), + user_data.get('classifier_model', 'gemini') + ) + return None + + @staticmethod + def create(username, email, password): + password_hash = generate_password_hash(password) + conn = get_db_connection() + try: + cursor = conn.cursor() + cursor.execute( + 'INSERT INTO users (username, email, password_hash) VALUES (?, ?, ?)', + (username, email, password_hash) + ) + conn.commit() + new_id = cursor.lastrowid + conn.close() + return User.get(new_id) + except sqlite3.IntegrityError: + conn.close() + return None # Username or email already exists + +def setup_login_manager(app): + """Initializes and configures the Flask-Login manager.""" + login_manager = LoginManager() + login_manager.init_app(app) + # This is the route Flask-Login will redirect to if a user tries to access + # a page that requires authentication without being logged in. + login_manager.login_view = 'auth.login' + + @login_manager.user_loader + def load_user(user_id): + return User.get(user_id) diff --git a/user_manager.py b/user_manager.py new file mode 100644 index 0000000000000000000000000000000000000000..29c84ce46d6c42e00a0c63cf37f53ca8dcebf3dc --- /dev/null +++ b/user_manager.py @@ -0,0 +1,46 @@ + +from flask import Blueprint, render_template, request, redirect, url_for, flash, session, g +from database import get_db_connection + +user_manager_bp = Blueprint('user_manager', __name__) + +@user_manager_bp.route('/users') +def users_list(): + conn = get_db_connection() + users = conn.execute('SELECT id, username FROM users').fetchall() + conn.close() + return render_template('users.html', users=users) + +@user_manager_bp.route('/users/create', methods=['POST']) +def create_user(): + username = request.form.get('username') + if not username: + flash('Username cannot be empty!', 'danger') + return redirect(url_for('user_manager.users_list')) + + conn = get_db_connection() + try: + conn.execute('INSERT INTO users (username) VALUES (?)', (username,)) + conn.commit() + flash(f'User {username} created successfully!', 'success') + except Exception as e: + flash(f'Error creating user: {e}', 'danger') + finally: + conn.close() + return redirect(url_for('user_manager.users_list')) + +@user_manager_bp.route('/switch_user/') +def switch_user(username): + conn = get_db_connection() + user = conn.execute('SELECT id, username FROM users WHERE username = ?', (username,)).fetchone() + conn.close() + + if user: + session['user_id'] = user['id'] + session['username'] = user['username'] + flash(f'Switched to user {username}', 'success') + else: + flash(f'User {username} not found', 'danger') + + # Redirect to the dashboard of the switched user + return redirect(url_for('dashboard.dashboard', username=username)) diff --git a/utils.py b/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..4bf45778e79cf81c8f3b0f480a8dae7813b160c0 --- /dev/null +++ b/utils.py @@ -0,0 +1,226 @@ +import os +import math +import base64 +import io +import sqlite3 +from PIL import Image, ImageDraw, ImageFont + +DATABASE = 'database.db' + +def get_db_connection(): + conn = sqlite3.connect(DATABASE) + conn.row_factory = sqlite3.Row + return conn + +def get_or_download_font(font_path="arial.ttf", font_size=50): + if not os.path.exists(font_path): + try: + import requests + response = requests.get("https://github.com/kavin808/arial.ttf/raw/refs/heads/master/arial.ttf", timeout=30) + response.raise_for_status() + with open(font_path, 'wb') as f: f.write(response.content) + except Exception: return ImageFont.load_default() + try: return ImageFont.truetype(font_path, size=font_size) + except IOError: return ImageFont.load_default() + +def draw_dashed_line(draw, p1, p2, fill, width, dash_length, gap_length): + """Draws a dashed line between two points.""" + dx = p2[0] - p1[0] + dy = p2[1] - p1[1] + length = (dx**2 + dy**2)**0.5 + if length == 0: + return + + dx /= length + dy /= length + + current_pos = 0 + while current_pos < length: + start = current_pos + end = current_pos + dash_length + if end > length: + end = length + + draw.line( + (p1[0] + start * dx, p1[1] + start * dy, + p1[0] + end * dx, p1[1] + end * dy), + fill=fill, + width=width + ) + current_pos += dash_length + gap_length + +def draw_dashed_rectangle(draw, box, fill, width, dash_length, gap_length): + """Draws a dashed rectangle.""" + x0, y0, x1, y1 = box + # Top + draw_dashed_line(draw, (x0, y0), (x1, y0), fill, width, dash_length, gap_length) + # Right + draw_dashed_line(draw, (x1, y0), (x1, y1), fill, width, dash_length, gap_length) + # Bottom + draw_dashed_line(draw, (x1, y1), (x0, y1), fill, width, dash_length, gap_length) + # Left + draw_dashed_line(draw, (x0, y1), (x0, y0), fill, width, dash_length, gap_length) + +def create_a4_pdf_from_images(image_info, base_folder, output_filename, images_per_page, output_folder=None, orientation='portrait', grid_rows=None, grid_cols=None, practice_mode='none', return_bytes=False, font_size_scale=1.0): + if not image_info: + return False + + A4_WIDTH_PX, A4_HEIGHT_PX = 4960, 7016 + + # Base font sizes + base_large = 60 + base_small = 45 + + # Apply scaling + font_large = get_or_download_font(font_size=int(base_large * font_size_scale)) + font_small = get_or_download_font(font_size=int(base_small * font_size_scale)) + + pages = [] + info_chunks = [image_info[i:i + images_per_page] for i in range(0, len(image_info), images_per_page)] + + for chunk in info_chunks: + if orientation == 'landscape': + page_width, page_height = A4_HEIGHT_PX, A4_WIDTH_PX + else: + page_width, page_height = A4_WIDTH_PX, A4_HEIGHT_PX + + page = Image.new('RGB', (page_width, page_height), 'white') + draw = ImageDraw.Draw(page) + + is_practice_mode = practice_mode != 'none' + + if grid_rows and grid_cols: + rows, cols = grid_rows, grid_cols + else: + # Default grid calculation + if len(chunk) > 0: + cols = int(math.ceil(math.sqrt(len(chunk)))) + rows = int(math.ceil(len(chunk) / cols)) + else: + rows, cols = 1, 1 + + cell_width = (page_width - 400) // cols + cell_height = (page_height - 400) // rows + + if is_practice_mode: + cell_width = (page_width - 400) // 2 # Use half the page for the question + + for i, info in enumerate(chunk): + col = i % cols + row = i // cols + + if practice_mode == 'portrait_2_spacious': + section_height = page_height // 2 + cell_x = 200 + cell_y = 200 + (i % 2) * section_height + cell_height = section_height - 200 + else: + cell_x = 200 + col * cell_width + cell_y = 200 + row * cell_height + + try: + img = None + if info.get('image_data'): + # Handle base64 encoded image data + header, encoded = info['image_data'].split(",", 1) + image_data = base64.b64decode(encoded) + img = Image.open(io.BytesIO(image_data)).convert("RGB") + elif info.get('processed_filename') or info.get('filename'): + # Handle image from file path + img_path = os.path.join(base_folder, info.get('processed_filename') or info.get('filename')) + if os.path.exists(img_path): + img = Image.open(img_path).convert("RGB") + + # --- Text and Image Placement --- + text_x = cell_x + 20 + if is_practice_mode and practice_mode != 'portrait_2_spacious': + text_x = 200 # Align to the left for practice modes + + # 1. Calculate text sizes + q_num_text = f"Q: {info['question_number']}" + info_text = f"Status: {info['status']} | Marked: {info['marked_solution']} | Correct: {info['actual_solution']}" + + q_num_bbox = draw.textbbox((0, 0), q_num_text, font=font_large) + info_text_bbox = draw.textbbox((0, 0), info_text, font=font_small) + + q_num_height = q_num_bbox[3] - q_num_bbox[1] + info_text_height = info_text_bbox[3] - info_text_bbox[1] + + text_padding = 20 + total_text_height = q_num_height + info_text_height + text_padding + + # 2. Draw text + text_y_start = cell_y + 20 + draw.text((text_x, text_y_start), q_num_text, fill="black", font=font_large) + draw.text((text_x, text_y_start + q_num_height + text_padding), info_text, fill="black", font=font_small) + + # 3. Position and paste image below text + if img: + image_y_start = text_y_start + total_text_height + 20 + + # Define target dimensions for the image + if practice_mode == 'portrait_2_spacious': + target_w = (page_width // 2) - 250 + available_h = cell_height - (total_text_height + 40) + target_h = available_h + elif is_practice_mode: + target_w = cell_width - 40 + available_h = cell_height - (total_text_height + 40) + target_h = available_h + else: + target_w = cell_width - 40 + available_h = cell_height - (total_text_height + 40) + target_h = available_h + + # Calculate new dimensions while maintaining aspect ratio + img_ratio = img.width / img.height + target_ratio = target_w / target_h + + if img_ratio > target_ratio: + new_w = int(target_w) + new_h = int(new_w / img_ratio) + else: + new_h = int(target_h) + new_w = int(new_h * img_ratio) + + # For spacious mode, scale up if smaller than a certain area + if practice_mode == 'portrait_2_spacious': + page_area = page_width * page_height + if new_w * new_h < page_area / 12: + scale_factor = math.sqrt((page_area / 12) / (new_w * new_h)) + scaled_w = int(new_w * scale_factor) + scaled_h = int(new_h * scale_factor) + if scaled_w <= target_w and scaled_h <= target_h: + new_w, new_h = scaled_w, scaled_h + + img = img.resize((new_w, new_h), Image.Resampling.LANCZOS) + + paste_x = cell_x + 20 + if is_practice_mode and practice_mode != 'portrait_2_spacious': + paste_x = 200 + + paste_position = (paste_x, image_y_start) + page.paste(img, paste_position) + + # Draw a dashed bounding box for cutting only if not in practice mode + if not is_practice_mode: + x0, y0 = paste_position + x1, y1 = x0 + new_w, y0 + new_h + draw_dashed_rectangle(draw, [x0, y0, x1, y1], fill="gray", width=3, dash_length=20, gap_length=15) + + except Exception as e: + print(f"Error processing image for PDF: {e}") + + pages.append(page) + + if pages: + if return_bytes: + pdf_bytes = io.BytesIO() + pages[0].save(pdf_bytes, "PDF", resolution=900.0, save_all=True, append_images=pages[1:]) + return pdf_bytes.getvalue() + elif output_folder and output_filename: + output_path = os.path.join(output_folder, output_filename) + pages[0].save(output_path, "PDF", resolution=900.0, save_all=True, append_images=pages[1:]) + return True + + return False \ No newline at end of file