Glossarion / QA_Scanner_GUI.py
Shirochi's picture
Upload 93 files
ec038f4 verified
"""
QA Scanner GUI Methods
These methods can be integrated into TranslatorGUI or used standalone
"""
import os
import sys
import re
import json
from PySide6.QtWidgets import (QApplication, QDialog, QWidget, QLabel, QPushButton,
QVBoxLayout, QHBoxLayout, QGridLayout, QFrame,
QCheckBox, QSpinBox, QSlider, QTextEdit, QScrollArea,
QRadioButton, QButtonGroup, QGroupBox, QComboBox,
QFileDialog, QMessageBox, QSizePolicy)
from PySide6.QtCore import Qt, QTimer, Signal, QThread, QObject, QUrl
from PySide6.QtGui import QFont, QPixmap, QIcon, QDesktopServices
import threading
import traceback
# WindowManager and UIHelper removed - not needed in PySide6
# Qt handles window management and UI utilities automatically
scan_html_folder = None # Will be lazy-loaded from translator_gui
def _normalize_target_language(display_text):
"""Normalize a human-facing target language label to a canonical value.
The QA pipeline expects simple lowercase identifiers like "english",
"korean", or "chinese". This helper maps common dropdown labels to
those canonical forms so detection logic stays stable even if the
UI wording changes (e.g. "Chinese (Simplified)").
"""
if not display_text:
return "english"
s = display_text.strip().lower()
mapping = {
# Core languages
"english": "english",
"en": "english",
"spanish": "spanish",
"es": "spanish",
"french": "french",
"fr": "french",
"german": "german",
"de": "german",
"portuguese": "portuguese",
"pt": "portuguese",
"italian": "italian",
"it": "italian",
"russian": "russian",
"ru": "russian",
"japanese": "japanese",
"ja": "japanese",
"korean": "korean",
"ko": "korean",
# Chinese variants (keep distinct)
"chinese": "chinese",
"chinese (simplified)": "chinese (simplified)",
"chinese (traditional)": "chinese (traditional)",
"zh": "chinese",
"zh-cn": "chinese (simplified)",
"zh-tw": "chinese (traditional)",
# RTL / other scripts
"arabic": "arabic",
"ar": "arabic",
"hebrew": "hebrew",
"he": "hebrew",
"thai": "thai",
"th": "thai",
}
if s in mapping:
return mapping[s]
# Fallback: use the first word (e.g. "english (us)" → "english")
first = s.split()[0]
return mapping.get(first, first)
def _normalize_source_language(display_text):
"""
Normalize source language without collapsing Chinese variants.
Returns lowercase labels that align with word_count_multipliers keys.
"""
if not display_text:
return 'auto'
s = display_text.strip().lower()
if s == 'auto':
return 'auto'
# Keep distinct variants for Chinese
if 'chinese' in s:
if 'traditional' in s:
return 'chinese (traditional)'
if 'simplified' in s:
return 'chinese (simplified)'
return 'chinese'
return s
def check_epub_folder_match(epub_name, folder_name, custom_suffixes=''):
"""
Check if EPUB name and folder name likely refer to the same content
Uses strict matching to avoid false positives with similar numbered titles
"""
# Normalize names for comparison
epub_norm = normalize_name_for_comparison(epub_name)
folder_norm = normalize_name_for_comparison(folder_name)
# Direct match
if epub_norm == folder_norm:
return True
# Check if folder has common output suffixes that should be ignored
output_suffixes = ['_output', '_translated', '_trans', '_en', '_english', '_done', '_complete', '_final']
if custom_suffixes:
custom_list = [s.strip() for s in custom_suffixes.split(',') if s.strip()]
output_suffixes.extend(custom_list)
for suffix in output_suffixes:
if folder_norm.endswith(suffix):
folder_base = folder_norm[:-len(suffix)]
if folder_base == epub_norm:
return True
if epub_norm.endswith(suffix):
epub_base = epub_norm[:-len(suffix)]
if epub_base == folder_norm:
return True
# Check for exact match with version numbers removed
version_pattern = r'[\s_-]v\d+$'
epub_no_version = re.sub(version_pattern, '', epub_norm)
folder_no_version = re.sub(version_pattern, '', folder_norm)
if epub_no_version == folder_no_version and (epub_no_version != epub_norm or folder_no_version != folder_norm):
return True
# STRICT NUMBER CHECK - all numbers must match exactly
epub_numbers = re.findall(r'\d+', epub_name)
folder_numbers = re.findall(r'\d+', folder_name)
if epub_numbers != folder_numbers:
return False
# If we get here, numbers match, so check if the text parts are similar enough
epub_text_only = re.sub(r'\d+', '', epub_norm).strip()
folder_text_only = re.sub(r'\d+', '', folder_norm).strip()
if epub_numbers and folder_numbers:
return epub_text_only == folder_text_only
return False
def normalize_name_for_comparison(name):
"""Normalize a filename for comparison - preserving number positions"""
name = name.lower()
name = re.sub(r'\.(epub|txt|html?)$', '', name)
name = re.sub(r'[-_\s]+', ' ', name)
name = re.sub(r'\[(?![^\]]*\d)[^\]]*\]', '', name)
name = re.sub(r'\((?![^)]*\d)[^)]*\)', '', name)
name = re.sub(r'[^\w\s\-]', ' ', name)
name = ' '.join(name.split())
return name.strip()
class QAScannerMixin:
"""Mixin class containing QA Scanner methods for TranslatorGUI"""
def _create_styled_checkbox(self, text):
"""Create a checkbox with all checkmarks disabled"""
from PySide6.QtWidgets import QCheckBox
checkbox = QCheckBox(text)
checkbox.setStyleSheet("""
QCheckBox {
color: white;
}
QCheckBox::indicator {
background-image: none;
image: none;
content: none;
text: none;
}
QCheckBox::indicator:checked {
background-image: none;
image: none;
content: none;
text: none;
}
""")
return checkbox
def _create_styled_radio_button(self, text):
"""Create a radio button with consistent styling"""
from PySide6.QtWidgets import QRadioButton
radio = QRadioButton(text)
radio.setStyleSheet("""
QRadioButton {
color: white;
font-family: Arial;
font-size: 10pt;
}
QRadioButton::indicator {
width: 13px;
height: 13px;
border: 2px solid #0d6efd;
border-radius: 7px;
background-color: transparent;
}
QRadioButton::indicator:checked {
background-color: #0d6efd;
border: 2px solid #0d6efd;
}
QRadioButton::indicator:hover {
border: 2px solid #0b5ed7;
}
QRadioButton::indicator:checked:hover {
background-color: #0b5ed7;
border: 2px solid #0b5ed7;
}
""")
return radio
def open_latest_qa_report(self):
"""Open the most recently found QA report (validation_results.html)."""
try:
override_dir = os.environ.get('OUTPUT_DIRECTORY') or self.config.get('output_directory')
newest = None
newest_mtime = -1
search_roots = []
if override_dir and os.path.isdir(override_dir):
search_roots.append(os.path.normpath(override_dir))
else:
search_roots.append(os.getcwd())
for root_dir in search_roots:
for root, _, files in os.walk(root_dir):
for fname in files:
if fname.lower() == "validation_results.html":
candidate = os.path.join(root, fname)
try:
mtime = os.path.getmtime(candidate)
except Exception:
mtime = 0
if mtime > newest_mtime:
newest_mtime = mtime
newest = candidate
# Fallback to cached path only if nothing found in current search
if not newest and getattr(self, 'last_qa_report_path', None) and os.path.exists(self.last_qa_report_path):
newest = self.last_qa_report_path
if not newest or not os.path.exists(newest):
QMessageBox.information(self, "QA Report", "QA report does not exist. Run a QA scan first.")
return
self.last_qa_report_path = newest
QDesktopServices.openUrl(QUrl.fromLocalFile(os.path.abspath(newest)))
if hasattr(self, 'append_log'):
self.append_log(f"📄 Opened QA report: {os.path.basename(newest)}")
except Exception as e:
try:
if hasattr(self, 'append_log'):
self.append_log(f"❌ Failed to open QA report: {e}")
except Exception:
pass
QMessageBox.warning(self, "QA Report", f"Failed to open QA report:\n{e}")
def run_qa_scan(self, mode_override=None, non_interactive=False, preselected_files=None):
"""Run QA scan with mode selection and settings"""
# Removed loading screen - initialize directly for smoother experience
try:
# Start a brief auto-scroll delay so first log lines are readable
try:
import time as _time
if hasattr(self, '_start_autoscroll_delay'):
self._start_autoscroll_delay(100)
elif hasattr(self, '_autoscroll_delay_until'):
self._autoscroll_delay_until = _time.time() + 0.6
except Exception:
pass
if not self._lazy_load_modules():
self.append_log("❌ Failed to load QA scanner modules")
return
# Check for scan_html_folder in the global scope from translator_gui
import sys
translator_module = sys.modules.get('translator_gui')
if translator_module is None or not hasattr(translator_module, 'scan_html_folder') or translator_module.scan_html_folder is None:
self.append_log("❌ QA scanner module is not available")
QMessageBox.critical(None, "Module Error", "QA scanner module is not available.")
return
if hasattr(self, 'qa_thread') and self.qa_thread and self.qa_thread.is_alive():
self.stop_requested = True
self.append_log("⛔ QA scan stop requested.")
return
self.append_log("✅ QA scanner initialized successfully")
except Exception as e:
self.append_log(f"❌ Error initializing QA scanner: {e}")
return
# Load QA scanner settings from config
qa_settings = self.config.get('qa_scanner_settings', {
'foreign_char_threshold': 10,
'excluded_characters': '',
'target_language': 'english',
'source_language': 'auto',
'check_encoding_issues': False,
'check_repetition': True,
'check_translation_artifacts': False,
'check_ai_artifacts': False,
'check_punctuation_mismatch': False,
'punctuation_loss_threshold': 49,
'flag_excess_punctuation': False,
'excess_punctuation_threshold': 49,
'check_glossary_leakage': True,
'check_missing_images': True,
'min_file_length': 0,
'report_format': 'detailed',
'auto_save_report': True,
'check_missing_html_tag': True,
'check_missing_header_tags': True,
'check_invalid_nesting': False,
'check_word_count_ratio': True,
'check_multiple_headers': True,
'warn_name_mismatch': True,
'quick_scan_sample_size': 1000,
'cache_enabled': True,
'cache_auto_size': False,
'cache_show_stats': False,
'cache_normalize_text': 10000,
'cache_similarity_ratio': 20000,
'cache_content_hashes': 5000,
'cache_semantic_fingerprint': 2000,
'cache_structural_signature': 2000,
'cache_translation_artifacts': 1000,
'word_count_multipliers': {
# Character-based multipliers (source chars → target chars, no spaces)
# CJK languages expand significantly when translated to alphabetic languages
'english': 1.0,
'spanish': 1.10,
'french': 1.10,
'german': 1.05,
'italian': 1.05,
'portuguese': 1.10,
'russian': 1.15,
'arabic': 1.15,
'hindi': 1.10,
'turkish': 1.05,
'chinese': 2.50,
'chinese (simplified)': 2.50,
'chinese (traditional)': 2.50,
'japanese': 2.20,
'korean': 2.30,
'hebrew': 1.05,
'thai': 1.10
}
})
# Ensure multipliers include all defaults
wordcount_defaults = qa_settings.get('word_count_multipliers', {})
if not wordcount_defaults or not isinstance(wordcount_defaults, dict):
wordcount_defaults = {}
for _k, _v in {
# Character-based multipliers (source chars → target chars, no spaces)
'english': 1.0, 'spanish': 1.10, 'french': 1.10, 'german': 1.05, 'italian': 1.05,
'portuguese': 1.10, 'russian': 1.15, 'arabic': 1.15, 'hindi': 1.10, 'turkish': 1.05,
'chinese': 2.50, 'chinese (simplified)': 2.50, 'chinese (traditional)': 2.50,
'japanese': 2.20, 'korean': 2.30, 'hebrew': 1.05, 'thai': 1.10,
'other': 1.0
}.items():
wordcount_defaults.setdefault(_k, _v)
qa_settings['word_count_multipliers'] = wordcount_defaults
# Keep QA target language aligned with the main target language.
# This ensures the scanner respects the same language the user
# selected for translation.
try:
main_lang = self.config.get('output_language') or os.getenv('OUTPUT_LANGUAGE', '')
if main_lang:
qa_settings['target_language'] = _normalize_target_language(main_lang)
except Exception:
pass
# Debug: Print current settings
print(f"[DEBUG] QA Settings: {qa_settings}")
print(f"[DEBUG] Target language: {qa_settings.get('target_language', 'NOT SET')}")
print(f"[DEBUG] Word count check enabled: {qa_settings.get('check_word_count_ratio', False)}")
# Optionally skip mode dialog if a mode override was provided (e.g., scanning phase)
selected_mode_value = mode_override if mode_override else None
if selected_mode_value is None:
# Show mode selection dialog with settings - calculate proportional sizing (halved)
screen = QApplication.primaryScreen().geometry()
screen_width = screen.width()
screen_height = screen.height()
dialog_width = int(screen_width * 0.51) # 50% of screen width
dialog_height = int(screen_height * 0.43) # 45% of screen height
mode_dialog = QDialog(self)
# Apply global stylesheet for consistent appearance IMMEDIATELY to prevent white flash
mode_dialog.setStyleSheet("""
QDialog {
background: qlineargradient(x1:0, y1:0, x2:0, y2:1,
stop:0 #1a1a2e, stop:1 #16213e);
}
QPushButton {
border: 1px solid #4a5568;
border-radius: 4px;
padding: 8px 16px;
background-color: #2d3748;
color: white;
font-weight: bold;
}
QPushButton:hover {
background-color: #4a5568;
border-color: #718096;
}
QPushButton:pressed {
background-color: #1a202c;
}
""")
mode_dialog.setWindowTitle("Select QA Scanner Mode")
mode_dialog.resize(dialog_width, dialog_height)
# Non-modal but stays on top
mode_dialog.setModal(False)
mode_dialog.setWindowFlags(Qt.Window | Qt.WindowStaysOnTopHint)
# Set window icon
try:
ico_path = os.path.join(self.base_dir, 'Halgakos.ico')
if os.path.isfile(ico_path):
mode_dialog.setWindowIcon(QIcon(ico_path))
except Exception:
pass
if selected_mode_value is None:
# Set minimum size to prevent dialog from being too small (using ratios)
# 35% width, 35% height for better content fit
min_width = int(screen_width * 0.45)
min_height = int(screen_height * 0.35)
mode_dialog.setMinimumSize(min_width, min_height)
# Variables
# selected_mode_value already set above
# Main container with constrained expansion
main_layout = QVBoxLayout(mode_dialog)
main_layout.setContentsMargins(10, 10, 10, 10)
# Content widget with padding
content_widget = QWidget()
content_layout = QVBoxLayout(content_widget)
content_layout.setContentsMargins(15, 10, 15, 10)
main_layout.addWidget(content_widget)
# Pre-create Quick Scan sample size spinbox so click handlers can capture it
quick_sample_spinbox = QSpinBox()
quick_sample_spinbox.setMinimum(-1) # -1 = use full text (no downsampling)
quick_sample_spinbox.setMaximum(20000)
quick_sample_spinbox.setSingleStep(500)
qs_initial = qa_settings.get('quick_scan_sample_size', 1000)
try:
qs_initial = int(qs_initial)
except Exception:
qs_initial = 1000
quick_sample_spinbox.setValue(qs_initial)
quick_sample_spinbox.setMinimumWidth(110)
quick_sample_spinbox.wheelEvent = lambda event: event.ignore()
# Auto-save whenever the value changes or editing finishes
def _save_quick_sample(val):
try:
val = int(val)
qa_settings['quick_scan_sample_size'] = val
if hasattr(self, 'config'):
self.config.setdefault('qa_scanner_settings', {})
self.config['qa_scanner_settings']['quick_scan_sample_size'] = val
if hasattr(self, 'save_config'):
self.save_config(show_message=False)
except Exception:
pass
try:
quick_sample_spinbox.valueChanged.disconnect()
except Exception:
pass
quick_sample_spinbox.valueChanged.connect(_save_quick_sample)
try:
quick_sample_spinbox.editingFinished.disconnect()
except Exception:
pass
quick_sample_spinbox.editingFinished.connect(lambda: _save_quick_sample(quick_sample_spinbox.value()))
# Persist current value immediately to ensure config key exists
_save_quick_sample(qs_initial)
# Title with subtitle
title_label = QLabel("Select Detection Mode")
title_label.setFont(QFont("Arial", 20, QFont.Bold))
title_label.setStyleSheet("color: #f0f0f0;")
title_label.setAlignment(Qt.AlignCenter)
content_layout.addWidget(title_label)
subtitle_label = QLabel("Choose how sensitive the duplicate detection should be")
subtitle_label.setFont(QFont("Arial", 11))
subtitle_label.setStyleSheet("color: #d0d0d0;")
subtitle_label.setAlignment(Qt.AlignCenter)
content_layout.addWidget(subtitle_label)
content_layout.addSpacing(8)
# Mode cards container
modes_widget = QWidget()
modes_layout = QGridLayout(modes_widget)
modes_layout.setSpacing(8)
content_layout.addWidget(modes_widget)
mode_data = [
{
"value": "ai-hunter",
"emoji": "🤖",
"title": "AI HUNTER",
"subtitle": "30% threshold",
"features": [
"✓ Catches AI retranslations",
"✓ Different translation styles",
"⚠ MANY false positives",
"✓ Same chapter, different words",
"✓ Detects paraphrasing",
"✓ Ultimate duplicate finder"
],
"bg_color": "#2a1a3e", # Dark purple
"hover_color": "#6a4c93", # Medium purple
"border_color": "#8b5cf6",
"accent_color": "#a78bfa",
"recommendation": "⚡ Best for finding ALL similar content"
},
{
"value": "aggressive",
"emoji": "🔥",
"title": "AGGRESSIVE",
"subtitle": "75% threshold",
"features": [
"✓ Catches most duplicates",
"✓ Good for similar chapters",
"⚠ Some false positives",
"✓ Finds edited duplicates",
"✓ Moderate detection",
"✓ Balanced approach"
],
"bg_color": "#3a1f1f", # Dark red
"hover_color": "#8b3a3a", # Medium red
"border_color": "#dc2626",
"accent_color": "#ef4444",
"recommendation": None
},
{
"value": "quick-scan",
"emoji": "⚡",
"title": "QUICK SCAN",
"subtitle": "85% threshold, Speed optimized",
"features": [
"✓ 3-5x faster scanning",
"✓ Checks consecutive chapters only",
"✓ Simplified analysis",
"✓ Skips AI Hunter",
"✓ Good for large libraries",
"✓ Minimal resource usage"
],
"bg_color": "#1f2937", # Dark gray
"hover_color": "#374151", # Medium gray
"border_color": "#059669",
"accent_color": "#10b981",
"recommendation": "✅ Recommended for average use"
},
{
"value": "custom",
"emoji": "⚙️",
"title": "CUSTOM",
"subtitle": "Configurable",
"features": [
"✓ Fully customizable",
"✓ Set your own thresholds",
"✓ Advanced controls",
"✓ Fine-tune detection",
"✓ Expert mode",
"✓ Maximum flexibility"
],
"bg_color": "#1e3a5f", # Dark blue
"hover_color": "#2c5aa0", # Medium blue
"border_color": "#3b82f6",
"accent_color": "#60a5fa",
"recommendation": None
}
]
# Restore original single-row layout (four cards across)
if selected_mode_value is None:
# Scale down the card contents on small screens while keeping the same 4-card row.
try:
ui_scale = min(1.0, max(0.75, min(screen_width / 1600.0, screen_height / 900.0)))
except Exception:
ui_scale = 1.0
emoji_px = max(28, int(38 * ui_scale))
title_pt = max(12, int(16 * ui_scale))
subtitle_pt = max(8, int(10 * ui_scale))
feature_pt = max(7, int(9 * ui_scale))
icon_logical = max(40, int(56 * ui_scale))
icon_h = max(44, int(60 * ui_scale))
# Make each column share space evenly
for col in range(len(mode_data)):
modes_layout.setColumnStretch(col, 1)
for idx, mi in enumerate(mode_data):
# Main card frame with initial background and border
card = QFrame()
card.setFrameShape(QFrame.StyledPanel)
card.setStyleSheet(f"""
QFrame {{
background-color: {mi["bg_color"]};
border: 2px solid {mi["border_color"]};
border-radius: 5px;
}}
QFrame:hover {{
background-color: {mi["hover_color"]};
}}
""")
card.setCursor(Qt.PointingHandCursor)
modes_layout.addWidget(card, 0, idx)
# Content layout
card_layout = QVBoxLayout(card)
m = max(6, int(10 * ui_scale))
mb = max(3, int(5 * ui_scale))
card_layout.setContentsMargins(m, m, m, mb)
# Icon/Emoji container with fixed height for alignment
icon_container = QWidget()
icon_container.setFixedHeight(icon_h)
icon_container.setStyleSheet("background-color: transparent;")
icon_container_layout = QVBoxLayout(icon_container)
icon_container_layout.setContentsMargins(0, 0, 0, 0)
icon_container_layout.setAlignment(Qt.AlignCenter)
# Icon/Emoji - use Halgakos.ico for AI Hunter, emoji for others (HiDPI, multi-path, sharp)
if mi["value"] == "ai-hunter":
icon_label = None
try:
import sys
candidates = [
os.path.join(getattr(self, "base_dir", os.getcwd()), "Halgakos.ico"),
os.path.join(os.path.dirname(os.path.abspath(__file__)), "Halgakos.ico"),
os.path.join(os.getcwd(), "Halgakos.ico"),
os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "Halgakos.ico"),
os.path.join(getattr(sys, "_MEIPASS", os.getcwd()), "Halgakos.ico"),
]
ico_path = next((p for p in candidates if os.path.isfile(p)), None)
if ico_path:
icon = QIcon(ico_path)
icon_label = QLabel()
icon_label.setStyleSheet("background-color: transparent; border: none;")
# Use device pixel ratio to avoid blur
try:
dpr = self.devicePixelRatioF()
except Exception:
dpr = 1.0
target_logical = icon_logical # requested logical size of label
dev_px = int(target_logical * max(1.0, dpr))
# Prefer largest available size to reduce scaling blur
avail = icon.availableSizes()
if avail:
best = max(avail, key=lambda s: s.width() * s.height())
pm = icon.pixmap(best * int(max(1.0, dpr)))
else:
pm = icon.pixmap(QSize(dev_px, dev_px))
if pm.isNull():
pm = QPixmap(ico_path)
if not pm.isNull():
try:
pm.setDevicePixelRatio(dpr)
except Exception:
pass
# Fit into logical target size
fitted = pm.scaled(
int(target_logical * dpr),
int(target_logical * dpr),
Qt.KeepAspectRatio,
Qt.SmoothTransformation,
)
try:
fitted.setDevicePixelRatio(dpr)
except Exception:
pass
icon_label.setPixmap(fitted)
icon_label.setFixedSize(target_logical, target_logical)
icon_label.setAlignment(Qt.AlignCenter)
icon_container_layout.addWidget(icon_label)
else:
icon_label = None
except Exception:
icon_label = None
if icon_label is None:
emoji_label = QLabel(mi["emoji"])
emoji_label.setFont(QFont("Arial", emoji_px))
emoji_label.setAlignment(Qt.AlignCenter)
emoji_label.setStyleSheet("background-color: transparent; color: white; border: none;")
icon_container_layout.addWidget(emoji_label)
else:
# Use emoji for other cards
emoji_label = QLabel(mi["emoji"])
emoji_label.setFont(QFont("Arial", emoji_px))
emoji_label.setAlignment(Qt.AlignCenter)
emoji_label.setStyleSheet("background-color: transparent; color: white; border: none;")
icon_container_layout.addWidget(emoji_label)
card_layout.addWidget(icon_container)
# Title
title_label = QLabel(mi["title"])
title_label.setFont(QFont("Arial", title_pt, QFont.Bold))
title_label.setWordWrap(True)
title_label.setAlignment(Qt.AlignCenter)
title_label.setStyleSheet(f"background-color: transparent; color: white; border: none;")
card_layout.addWidget(title_label)
# Subtitle
subtitle_label = QLabel(mi["subtitle"])
subtitle_label.setFont(QFont("Arial", subtitle_pt))
subtitle_label.setWordWrap(True)
subtitle_label.setAlignment(Qt.AlignCenter)
subtitle_label.setStyleSheet(f"background-color: transparent; color: {mi['accent_color']}; border: none;")
card_layout.addWidget(subtitle_label)
card_layout.addSpacing(6)
# Features
for feature in mi["features"]:
feature_label = QLabel(feature)
feature_label.setFont(QFont("Arial", feature_pt))
feature_label.setWordWrap(True)
feature_label.setStyleSheet(f"background-color: transparent; color: #e0e0e0; border: none;")
card_layout.addWidget(feature_label)
# Recommendation badge if present
if mi["recommendation"]:
card_layout.addSpacing(6)
rec_label = QLabel(mi["recommendation"])
rec_label.setFont(QFont("Arial", feature_pt, QFont.Bold))
rec_label.setWordWrap(True)
rec_label.setStyleSheet(f"""
background-color: {mi['accent_color']};
color: white;
padding: 3px 6px;
border-radius: 3px;
""")
rec_label.setAlignment(Qt.AlignCenter)
card_layout.addWidget(rec_label)
card_layout.addStretch()
# Click handler
def make_click_handler(mode_value):
def handler():
nonlocal selected_mode_value
# Persist quick scan sample size before closing
qa_settings['quick_scan_sample_size'] = quick_sample_spinbox.value()
try:
if hasattr(self, 'config'):
if 'qa_scanner_settings' not in self.config:
self.config['qa_scanner_settings'] = {}
self.config['qa_scanner_settings'].update(qa_settings)
# Save quietly to persist between runs
if hasattr(self, 'save_config'):
self.save_config(show_message=False)
except Exception:
pass
selected_mode_value = mode_value
mode_dialog.accept()
return handler
# Make card clickable with mouse press event
card.mousePressEvent = lambda event, handler=make_click_handler(mi["value"]): handler()
if selected_mode_value is None:
# Quick Scan sample size control
qs_row = QWidget()
qs_layout = QHBoxLayout(qs_row)
qs_layout.setContentsMargins(4, 8, 4, 4)
qs_label = QLabel("Quick Scan duplicate check sample size (characters):")
qs_label.setFont(QFont("Arial", 10))
qs_label.setStyleSheet("color: #f0f0f0;")
qs_layout.addWidget(qs_label)
qs_layout.addWidget(quick_sample_spinbox)
hint = QLabel("Used only for duplicate detection; -1 = all text, 0 = disable check")
hint.setStyleSheet("color: #9ca3af;")
hint.setFont(QFont("Arial", 9))
qs_layout.addWidget(hint)
qs_layout.addStretch()
content_layout.addWidget(qs_row)
# Add separator line before buttons
separator = QFrame()
separator.setFrameShape(QFrame.HLine)
separator.setStyleSheet("background-color: #cccccc;")
separator.setFixedHeight(1)
content_layout.addWidget(separator)
content_layout.addSpacing(10)
# Add settings/button layout
button_layout = QHBoxLayout()
button_layout.addStretch()
# Open QA report button (left of auto-search toggle)
open_report_btn = QPushButton("📁 Open QA Report")
open_report_btn.setMinimumWidth(130)
open_report_btn.setStyleSheet("""
QPushButton {
background-color: #17a2b8;
color: white;
border: 1px solid #17a2b8;
padding: 8px 10px;
border-radius: 4px;
font-weight: bold;
}
QPushButton:hover {
background-color: #138496;
border-color: #117a8b;
}
""")
open_report_btn.clicked.connect(lambda: self.open_latest_qa_report())
button_layout.addWidget(open_report_btn)
button_layout.addSpacing(10)
def show_qa_settings():
"""Show QA Scanner settings dialog"""
self.show_qa_scanner_settings(mode_dialog, qa_settings)
# Auto-search checkbox
if not hasattr(self, 'qa_auto_search_output_checkbox'):
self.qa_auto_search_output_checkbox = self._create_styled_checkbox("Auto-search output")
# Define the save handler
def save_auto_search_state(checked):
self.config['qa_auto_search_output'] = checked
self.save_config(show_message=False)
self.qa_auto_search_save_handler = save_auto_search_state
# Always update checkbox state from current config
# Block signals temporarily to prevent triggering save during programmatic update
self.qa_auto_search_output_checkbox.blockSignals(True)
self.qa_auto_search_output_checkbox.setChecked(self.config.get('qa_auto_search_output', True))
self.qa_auto_search_output_checkbox.blockSignals(False)
# Connect or reconnect the signal handler
try:
self.qa_auto_search_output_checkbox.toggled.disconnect()
except:
pass # No handler was connected
self.qa_auto_search_output_checkbox.toggled.connect(self.qa_auto_search_save_handler)
button_layout.addWidget(self.qa_auto_search_output_checkbox)
button_layout.addSpacing(10)
settings_btn = QPushButton("⚙️ Scanner Settings")
settings_btn.setMinimumWidth(140)
settings_btn.setStyleSheet("""
QPushButton {
background-color: #0d6efd;
color: white;
border: 1px solid #0d6efd;
padding: 8px 10px;
border-radius: 4px;
}
QPushButton:hover {
background-color: #0b5ed7;
}
""")
settings_btn.clicked.connect(show_qa_settings)
button_layout.addWidget(settings_btn)
button_layout.addSpacing(10)
cancel_btn = QPushButton("Cancel")
cancel_btn.setMinimumWidth(100)
cancel_btn.setStyleSheet("""
QPushButton {
background-color: #dc3545;
color: white;
border: 1px solid #dc3545;
padding: 8px 10px;
border-radius: 4px;
}
QPushButton:hover {
background-color: #bb2d3b;
}
""")
cancel_btn.clicked.connect(mode_dialog.reject)
button_layout.addWidget(cancel_btn)
button_layout.addStretch()
content_layout.addLayout(button_layout)
# Handle window close (X button)
def on_close():
nonlocal selected_mode_value
selected_mode_value = None
mode_dialog.rejected.connect(on_close)
# Show dialog non-modally and wait for result using local event loop
# This allows interaction with the main window while waiting
mode_dialog.show()
from PySide6.QtCore import QEventLoop
loop = QEventLoop()
mode_dialog.finished.connect(loop.quit)
loop.exec()
result = mode_dialog.result()
# Check if user canceled or selected a mode
if result == QDialog.Rejected or selected_mode_value is None:
self.append_log("⚠️ QA scan canceled.")
return
# End of optional mode dialog
# Show custom settings dialog if custom mode is selected
# BUT skip the dialog if non_interactive=True (e.g., post-translation scan)
if selected_mode_value == "custom" and not non_interactive:
# Create custom settings dialog
custom_dialog = QDialog(self)
# Apply dark stylesheet IMMEDIATELY to prevent white flash
custom_dialog.setStyleSheet("""
QDialog {
background-color: #2d2d2d;
color: white;
}
QGroupBox {
color: white;
border: 1px solid #555;
margin: 10px;
padding-top: 10px;
}
QGroupBox::title {
color: white;
left: 10px;
padding: 0 5px;
}
QLabel {
color: white;
}
QPushButton {
background-color: #404040;
color: white;
border: 1px solid #555;
padding: 5px;
}
QPushButton:hover {
background-color: #505050;
}
""")
custom_dialog.setWindowTitle("Custom Mode Settings")
custom_dialog.setModal(True)
# Use screen ratios: 20% width, 50% height for better content fit
screen = QApplication.primaryScreen().geometry()
custom_width = int(screen.width() * 0.51)
custom_height = int(screen.height() * 0.60)
custom_dialog.resize(custom_width, custom_height)
# Set window icon
try:
ico_path = os.path.join(self.base_dir, 'Halgakos.ico')
if os.path.isfile(ico_path):
custom_dialog.setWindowIcon(QIcon(ico_path))
except Exception:
pass
# Main layout
dialog_layout = QVBoxLayout(custom_dialog)
# Scroll area
scroll = QScrollArea()
scroll.setWidgetResizable(True)
scroll.setHorizontalScrollBarPolicy(Qt.ScrollBarAsNeeded)
scroll.setVerticalScrollBarPolicy(Qt.ScrollBarAsNeeded)
# Scrollable content widget
scroll_widget = QWidget()
scroll_layout = QVBoxLayout(scroll_widget)
scroll.setWidget(scroll_widget)
dialog_layout.addWidget(scroll)
# Variables for custom settings (using native Python values instead of tk vars)
# Load saved settings from config if they exist
saved_custom_settings = self.config.get('qa_scanner_settings', {}).get('custom_mode_settings', {})
# Default values
custom_settings = {
'similarity': 85,
'semantic': 80,
'structural': 90,
'word_overlap': 75,
'minhash_threshold': 80,
'consecutive_chapters': 2,
'check_all_pairs': False,
'sample_size': 3000,
'min_text_length': 500,
'min_duplicate_word_count': 500
}
# Override with saved settings if they exist
if saved_custom_settings:
# Load threshold values (they're stored as decimals, need to convert to percentages)
saved_thresholds = saved_custom_settings.get('thresholds', {})
if saved_thresholds:
custom_settings['similarity'] = int(saved_thresholds.get('similarity', 0.85) * 100)
custom_settings['semantic'] = int(saved_thresholds.get('semantic', 0.80) * 100)
custom_settings['structural'] = int(saved_thresholds.get('structural', 0.90) * 100)
custom_settings['word_overlap'] = int(saved_thresholds.get('word_overlap', 0.75) * 100)
custom_settings['minhash_threshold'] = int(saved_thresholds.get('minhash_threshold', 0.80) * 100)
# Load other settings
custom_settings['consecutive_chapters'] = saved_custom_settings.get('consecutive_chapters', 2)
custom_settings['check_all_pairs'] = saved_custom_settings.get('check_all_pairs', False)
custom_settings['sample_size'] = saved_custom_settings.get('sample_size', 3000)
custom_settings['min_text_length'] = saved_custom_settings.get('min_text_length', 500)
self.append_log("📥 Loaded saved custom mode settings from config")
# Store widget references
custom_widgets = {}
# Title with icons on both sides
title_container = QWidget()
title_layout = QHBoxLayout(title_container)
title_layout.setContentsMargins(0, 0, 0, 0)
# Left icon
left_icon_label = QLabel()
try:
ico_path = os.path.join(self.base_dir, 'Halgakos.ico')
if os.path.isfile(ico_path):
icon = QIcon(ico_path)
pixmap = icon.pixmap(48, 48)
if not pixmap.isNull():
left_icon_label.setPixmap(pixmap)
left_icon_label.setAlignment(Qt.AlignCenter)
left_icon_label.setStyleSheet("background-color: transparent; border: none;")
except Exception:
pass
# Title text
title_label = QLabel("Configure Custom Detection Settings")
title_label.setFont(QFont('Arial', 20, QFont.Bold))
title_label.setAlignment(Qt.AlignCenter)
title_label.setStyleSheet("background-color: transparent; border: none;")
# Right icon
right_icon_label = QLabel()
try:
ico_path = os.path.join(self.base_dir, 'Halgakos.ico')
if os.path.isfile(ico_path):
icon = QIcon(ico_path)
pixmap = icon.pixmap(48, 48)
if not pixmap.isNull():
right_icon_label.setPixmap(pixmap)
right_icon_label.setAlignment(Qt.AlignCenter)
right_icon_label.setStyleSheet("background-color: transparent; border: none;")
except Exception:
pass
# Add to layout with proper spacing
title_layout.addStretch()
title_layout.addWidget(left_icon_label)
title_layout.addSpacing(15)
title_layout.addWidget(title_label)
title_layout.addSpacing(15)
title_layout.addWidget(right_icon_label)
title_layout.addStretch()
scroll_layout.addWidget(title_container)
scroll_layout.addSpacing(20)
# Detection Thresholds Section
threshold_group = QGroupBox("Detection Thresholds (%)")
threshold_group.setFont(QFont('Arial', 12, QFont.Bold))
threshold_layout = QVBoxLayout(threshold_group)
threshold_layout.setContentsMargins(25, 25, 25, 25)
scroll_layout.addWidget(threshold_group)
threshold_descriptions = {
'similarity': ('Text Similarity', 'Character-by-character comparison'),
'semantic': ('Semantic Analysis', 'Meaning and context matching'),
'structural': ('Structural Patterns', 'Document structure similarity'),
'word_overlap': ('Word Overlap', 'Common words between texts'),
'minhash_threshold': ('MinHash Similarity', 'Fast approximate matching')
}
# Create percentage labels dictionary to store references
percentage_labels = {}
for setting_key, (label_text, description) in threshold_descriptions.items():
# Container for each threshold
row_widget = QWidget()
row_layout = QHBoxLayout(row_widget)
row_layout.setContentsMargins(0, 8, 0, 8)
# Left side - labels
label_widget = QWidget()
label_layout = QVBoxLayout(label_widget)
label_layout.setContentsMargins(0, 0, 0, 0)
main_label = QLabel(f"{label_text} - {description}:")
main_label.setFont(QFont('Arial', 11))
label_layout.addWidget(main_label)
label_widget.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Preferred)
row_layout.addWidget(label_widget)
# Right side - slider and percentage
slider_widget = QWidget()
slider_layout = QHBoxLayout(slider_widget)
slider_layout.setContentsMargins(20, 0, 0, 0)
# Create slider
slider = QSlider(Qt.Horizontal)
slider.setMinimum(10)
slider.setMaximum(100)
slider.setValue(custom_settings[setting_key])
slider.setMinimumWidth(300)
# Disable mousewheel scrolling on slider
slider.wheelEvent = lambda event: event.ignore()
slider_layout.addWidget(slider)
# Percentage label (shows current value)
percentage_label = QLabel(f"{custom_settings[setting_key]}%")
percentage_label.setFont(QFont('Arial', 12, QFont.Bold))
percentage_label.setMinimumWidth(50)
percentage_label.setAlignment(Qt.AlignRight)
slider_layout.addWidget(percentage_label)
percentage_labels[setting_key] = percentage_label
row_layout.addWidget(slider_widget)
threshold_layout.addWidget(row_widget)
# Store slider widget reference
custom_widgets[setting_key] = slider
# Update percentage label when slider moves
def create_update_function(key, label, settings_dict):
def update_percentage(value):
settings_dict[key] = value
label.setText(f"{value}%")
return update_percentage
# Connect slider to update function
update_func = create_update_function(setting_key, percentage_label, custom_settings)
slider.valueChanged.connect(update_func)
scroll_layout.addSpacing(15)
# Processing Options Section
options_group = QGroupBox("Processing Options")
options_group.setFont(QFont('Arial', 12, QFont.Bold))
options_layout = QVBoxLayout(options_group)
options_layout.setContentsMargins(20, 20, 20, 20)
scroll_layout.addWidget(options_group)
# Consecutive chapters option with spinbox
consec_widget = QWidget()
consec_layout = QHBoxLayout(consec_widget)
consec_layout.setContentsMargins(0, 5, 0, 5)
consec_label = QLabel("Consecutive chapters to check:")
consec_label.setFont(QFont('Arial', 11))
consec_layout.addWidget(consec_label)
consec_spinbox = QSpinBox()
consec_spinbox.setMinimum(1)
consec_spinbox.setMaximum(10)
consec_spinbox.setValue(custom_settings['consecutive_chapters'])
consec_spinbox.setMinimumWidth(100)
# Disable mousewheel scrolling
consec_spinbox.wheelEvent = lambda event: event.ignore()
consec_layout.addWidget(consec_spinbox)
consec_layout.addStretch()
options_layout.addWidget(consec_widget)
custom_widgets['consecutive_chapters'] = consec_spinbox
# Sample size option
sample_widget = QWidget()
sample_layout = QHBoxLayout(sample_widget)
sample_layout.setContentsMargins(0, 5, 0, 5)
sample_label = QLabel("Sample size for comparison (characters):")
sample_label.setFont(QFont('Arial', 11))
sample_layout.addWidget(sample_label)
# Sample size spinbox with larger range
# -1 = use all characters (no downsampling)
# 0 = disable duplicate detection
sample_spinbox = QSpinBox()
sample_spinbox.setMinimum(-1)
# QSpinBox requires a maximum; set it extremely high to be effectively "no maximum"
sample_spinbox.setMaximum(2000000000)
sample_spinbox.setSingleStep(500)
sample_spinbox.setValue(custom_settings['sample_size'])
sample_spinbox.setMinimumWidth(100)
sample_spinbox.setToolTip("-1 = use all characters, 0 = disable duplicate detection")
# Disable mousewheel scrolling
sample_spinbox.wheelEvent = lambda event: event.ignore()
sample_layout.addWidget(sample_spinbox)
sample_layout.addStretch()
options_layout.addWidget(sample_widget)
custom_widgets['sample_size'] = sample_spinbox
# Minimum text length option
min_length_widget = QWidget()
min_length_layout = QHBoxLayout(min_length_widget)
min_length_layout.setContentsMargins(0, 5, 0, 5)
min_length_label = QLabel("Minimum text length to process (characters):")
min_length_label.setFont(QFont('Arial', 11))
min_length_layout.addWidget(min_length_label)
# Minimum length spinbox
min_length_spinbox = QSpinBox()
min_length_spinbox.setMinimum(100)
min_length_spinbox.setMaximum(5000)
min_length_spinbox.setSingleStep(100)
min_length_spinbox.setValue(custom_settings['min_text_length'])
min_length_spinbox.setMinimumWidth(100)
# Disable mousewheel scrolling
min_length_spinbox.wheelEvent = lambda event: event.ignore()
min_length_layout.addWidget(min_length_spinbox)
min_length_layout.addStretch()
options_layout.addWidget(min_length_widget)
custom_widgets['min_text_length'] = min_length_spinbox
# Check all file pairs option
check_all_checkbox = self._create_styled_checkbox("Check all file pairs (slower but more thorough)")
check_all_checkbox.setChecked(custom_settings['check_all_pairs'])
options_layout.addWidget(check_all_checkbox)
custom_widgets['check_all_pairs'] = check_all_checkbox
scroll_layout.addStretch()
# Create fixed bottom button section (outside scroll area)
button_widget = QWidget()
button_layout = QHBoxLayout(button_widget)
button_layout.setContentsMargins(20, 15, 20, 15)
# Flag to track if settings were saved
settings_saved = False
def save_custom_settings():
"""Save custom settings and close dialog for scan"""
nonlocal settings_saved
qa_settings['custom_mode_settings'] = {
'thresholds': {
'similarity': custom_widgets['similarity'].value() / 100,
'semantic': custom_widgets['semantic'].value() / 100,
'structural': custom_widgets['structural'].value() / 100,
'word_overlap': custom_widgets['word_overlap'].value() / 100,
'minhash_threshold': custom_widgets['minhash_threshold'].value() / 100
},
'consecutive_chapters': custom_widgets['consecutive_chapters'].value(),
'check_all_pairs': custom_widgets['check_all_pairs'].isChecked(),
'sample_size': custom_widgets['sample_size'].value(),
'min_text_length': custom_widgets['min_text_length'].value()
}
settings_saved = True
self.append_log("✅ Custom detection settings saved")
custom_dialog.accept()
def save_settings_to_config():
"""Save settings to config.json without closing dialog"""
try:
# Update qa_settings with current values
current_custom_settings = {
'thresholds': {
'similarity': custom_widgets['similarity'].value() / 100,
'semantic': custom_widgets['semantic'].value() / 100,
'structural': custom_widgets['structural'].value() / 100,
'word_overlap': custom_widgets['word_overlap'].value() / 100,
'minhash_threshold': custom_widgets['minhash_threshold'].value() / 100
},
'consecutive_chapters': custom_widgets['consecutive_chapters'].value(),
'check_all_pairs': custom_widgets['check_all_pairs'].isChecked(),
'sample_size': custom_widgets['sample_size'].value(),
'min_text_length': custom_widgets['min_text_length'].value()
}
# Ensure qa_scanner_settings exists in config
if 'qa_scanner_settings' not in self.config:
self.config['qa_scanner_settings'] = {}
# Update config with current custom settings - FORCE UPDATE
self.config['qa_scanner_settings']['custom_mode_settings'] = current_custom_settings
# Also update qa_settings dict for this session
qa_settings['custom_mode_settings'] = current_custom_settings
# Write config directly to ensure persistence
import json
from api_key_encryption import encrypt_config
google_creds_path = self.config.get('google_cloud_credentials')
encrypted_config = encrypt_config(self.config)
if google_creds_path:
encrypted_config['google_cloud_credentials'] = google_creds_path
# Get config file path
config_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'config.json')
# Write to file
with open(config_file, 'w', encoding='utf-8') as f:
json.dump(encrypted_config, f, ensure_ascii=False, indent=2)
# Show success message
self.append_log("✅ Custom settings saved to config.json")
self.append_log(f"💾 Saved thresholds: similarity={current_custom_settings['thresholds']['similarity']:.0%}, semantic={current_custom_settings['thresholds']['semantic']:.0%}, structural={current_custom_settings['thresholds']['structural']:.0%}")
# Animate the save button
original_text = save_config_btn.text()
original_style = save_config_btn.styleSheet()
save_config_btn.setText("💾 Saved!")
save_config_btn.setStyleSheet("""
QPushButton {
background-color: #28a745;
color: white;
border: 1px solid #28a745;
padding: 6px 12px;
font-weight: bold;
}
""")
# Reset button after delay
def reset_button():
save_config_btn.setText(original_text)
save_config_btn.setStyleSheet(original_style)
QTimer.singleShot(1500, reset_button)
except Exception as e:
self.append_log(f"❌ Error saving settings: {e}")
import traceback
traceback.print_exc()
def reset_to_defaults():
"""Reset all values to default settings"""
reply = QMessageBox.question(custom_dialog, "Reset to Defaults",
"Reset all values to default settings?",
QMessageBox.Yes | QMessageBox.No)
if reply == QMessageBox.Yes:
custom_widgets['similarity'].setValue(85)
custom_widgets['semantic'].setValue(80)
custom_widgets['structural'].setValue(90)
custom_widgets['word_overlap'].setValue(75)
custom_widgets['minhash_threshold'].setValue(80)
custom_widgets['consecutive_chapters'].setValue(2)
custom_widgets['check_all_pairs'].setChecked(False)
custom_widgets['sample_size'].setValue(3000)
custom_widgets['min_text_length'].setValue(500)
self.append_log("ℹ️ Settings reset to defaults")
# Flag to prevent recursive cancel calls
cancel_in_progress = False
def cancel_settings():
"""Cancel without saving"""
nonlocal settings_saved, cancel_in_progress
# Prevent recursive calls
if cancel_in_progress:
return
cancel_in_progress = True
try:
# Disconnect signal before rejecting to prevent loop
try:
custom_dialog.rejected.disconnect(cancel_settings)
except:
pass
custom_dialog.reject()
finally:
cancel_in_progress = False
# Create buttons for bottom section
cancel_btn = QPushButton("Cancel")
cancel_btn.setMinimumWidth(140)
cancel_btn.setStyleSheet("background-color: #6c757d; color: white; padding: 6px 12px; font-weight: bold;")
cancel_btn.clicked.connect(cancel_settings)
button_layout.addWidget(cancel_btn)
reset_btn = QPushButton("Reset to Default")
reset_btn.setMinimumWidth(140)
reset_btn.setStyleSheet("background-color: #ffc107; color: black; padding: 6px 12px; font-weight: bold;")
reset_btn.clicked.connect(reset_to_defaults)
button_layout.addWidget(reset_btn)
# Save Settings button (saves to config.json)
save_config_btn = QPushButton("💾 Save Settings")
save_config_btn.setMinimumWidth(140)
save_config_btn.setStyleSheet("""
QPushButton {
background-color: #007bff;
color: white;
border: 1px solid #007bff;
padding: 6px 12px;
font-weight: bold;
}
QPushButton:hover {
background-color: #0056b3;
}
""")
save_config_btn.clicked.connect(save_settings_to_config)
button_layout.addWidget(save_config_btn)
start_btn = QPushButton("Start Scan")
start_btn.setMinimumWidth(140)
start_btn.setStyleSheet("background-color: #28a745; color: white; padding: 6px 12px; font-weight: bold;")
start_btn.clicked.connect(save_custom_settings)
button_layout.addWidget(start_btn)
# Add button widget to main layout (not scroll layout)
dialog_layout.addWidget(button_widget)
# Handle window close properly - treat as cancel
# Store the connection so we can disconnect it later if needed
rejected_connection = custom_dialog.rejected.connect(cancel_settings)
# Show dialog and wait for result
result = custom_dialog.exec()
# If user cancelled at this dialog, cancel the whole scan
if not settings_saved:
self.append_log("⚠️ QA scan canceled - no custom settings were saved.")
return
# Check if word count cross-reference is enabled but no source file is selected
check_word_count = qa_settings.get('check_word_count_ratio', False)
epub_files_to_scan = []
primary_epub_path = None
# Determine if text file mode is enabled
text_file_mode = self.config.get('qa_text_file_mode', False)
if hasattr(self, 'qa_text_file_mode_checkbox'):
try:
text_file_mode = bool(self.qa_text_file_mode_checkbox.isChecked())
except Exception:
pass
# ALWAYS populate epub_files_to_scan for auto-search, regardless of word count checking
# First check if current selection actually contains source files (EPUB, TXT, PDF, or MD)
current_epub_files = []
if hasattr(self, 'selected_files') and self.selected_files:
# Check for EPUB, TXT, PDF, and MD files
current_epub_files = [f for f in self.selected_files if f.lower().endswith(('.epub', '.txt', '.pdf', '.md'))]
epub_count = len([f for f in current_epub_files if f.lower().endswith('.epub')])
txt_count = len([f for f in current_epub_files if f.lower().endswith('.txt')])
pdf_count = len([f for f in current_epub_files if f.lower().endswith('.pdf')])
md_count = len([f for f in current_epub_files if f.lower().endswith('.md')])
print(f"[DEBUG] Current selection contains {epub_count} EPUB files, {txt_count} TXT files, {pdf_count} PDF files, and {md_count} MD files")
if current_epub_files:
# Use source files from current selection
epub_files_to_scan = current_epub_files
print(f"[DEBUG] Using {len(epub_files_to_scan)} source files from current selection")
else:
# No source files in current selection - check if we have stored path
primary_epub_path = self.get_current_epub_path()
print(f"[DEBUG] get_current_epub_path returned: {primary_epub_path}")
if primary_epub_path:
epub_files_to_scan = [primary_epub_path]
print(f"[DEBUG] Using stored source file for auto-search")
# Now handle word count specific logic if enabled
if check_word_count:
print("[DEBUG] Word count check is enabled, validating EPUB availability...")
# Check if we have source files for word count analysis
if not epub_files_to_scan:
# No source files available for word count analysis
file_type = "text" if text_file_mode else "EPUB"
msg = QMessageBox(self)
msg.setIcon(QMessageBox.Warning)
msg.setWindowTitle(f"No Source {file_type.upper()} Selected")
msg.setText(f"Word count cross-reference is enabled but no source {file_type} file is selected.")
msg.setInformativeText("Would you like to:\n"
"• YES - Continue scan without word count analysis\n"
f"• NO - Select a {file_type} file now\n"
"• CANCEL - Cancel the scan")
msg.setStandardButtons(QMessageBox.Yes | QMessageBox.No | QMessageBox.Cancel)
result = msg.exec()
if result == QMessageBox.Cancel:
self.append_log("⚠️ QA scan canceled.")
return
elif result == QMessageBox.No: # No - Select source file now
if text_file_mode:
epub_path, _ = QFileDialog.getOpenFileName(
self,
"Select Source Text File",
"",
"Text files (*.txt);;All files (*.*)"
)
else:
epub_path, _ = QFileDialog.getOpenFileName(
self,
"Select Source EPUB File",
"",
"EPUB files (*.epub);;All files (*.*)"
)
if not epub_path:
retry = QMessageBox.question(
self,
"No File Selected",
f"No {file_type} file was selected.\n\n" +
"Do you want to continue the scan without word count analysis?",
QMessageBox.Yes | QMessageBox.No
)
if retry == QMessageBox.No:
self.append_log("⚠️ QA scan canceled.")
return
else:
qa_settings = qa_settings.copy()
qa_settings['check_word_count_ratio'] = False
self.append_log("ℹ️ Proceeding without word count analysis.")
epub_files_to_scan = []
else:
self.selected_epub_path = epub_path
self.config['last_epub_path'] = epub_path
self.save_config(show_message=False)
self.append_log(f"✅ Selected {file_type}: {os.path.basename(epub_path)}")
epub_files_to_scan = [epub_path]
else: # Yes - Continue without word count
qa_settings = qa_settings.copy()
qa_settings['check_word_count_ratio'] = False
self.append_log("ℹ️ Proceeding without word count analysis.")
epub_files_to_scan = []
# Try to auto-detect output folders based on EPUB files
folders_to_scan = []
# Get auto-search preference from checkbox if it exists, otherwise from config
auto_search_enabled = self.config.get('qa_auto_search_output', True)
if hasattr(self, 'qa_auto_search_output_checkbox'):
try:
auto_search_enabled = bool(self.qa_auto_search_output_checkbox.isChecked())
except Exception:
pass
# Debug output for scanning phase removed
if auto_search_enabled and epub_files_to_scan:
# Process each EPUB file to find its corresponding output folder
self.append_log(f"🔍 DEBUG: Auto-search running with {len(epub_files_to_scan)} EPUB files")
for epub_path in epub_files_to_scan:
self.append_log(f"🔍 DEBUG: Processing EPUB: {epub_path}")
try:
epub_base = os.path.splitext(os.path.basename(epub_path))[0]
current_dir = os.getcwd()
script_dir = os.path.dirname(os.path.abspath(__file__))
self.append_log(f"🔍 DEBUG: EPUB base name: '{epub_base}'")
self.append_log(f"🔍 DEBUG: Current dir: {current_dir}")
self.append_log(f"🔍 DEBUG: Script dir: {script_dir}")
# Check the most common locations in order of priority
candidates = [
os.path.join(current_dir, epub_base), # current working directory
os.path.join(script_dir, epub_base), # src directory (where output typically goes)
os.path.join(current_dir, 'src', epub_base), # src subdirectory from current dir
]
# Add output directory override if configured
override_dir = os.environ.get('OUTPUT_DIRECTORY') or self.config.get('output_directory')
if override_dir:
candidates.insert(0, os.path.join(override_dir, epub_base))
self.append_log(f"🔍 DEBUG: Checking override dir: {override_dir}")
folder_found = None
for i, candidate in enumerate(candidates):
exists = os.path.isdir(candidate)
self.append_log(f" [{epub_base}] Checking candidate {i+1}: {candidate} - {'EXISTS' if exists else 'NOT FOUND'}")
if exists:
# Verify the folder actually contains appropriate files (HTML/XHTML or TXT)
try:
files = os.listdir(candidate)
# Determine if text file mode is enabled
text_file_mode = self.config.get('qa_text_file_mode', False)
if hasattr(self, 'qa_text_file_mode_checkbox'):
try:
text_file_mode = bool(self.qa_text_file_mode_checkbox.isChecked())
except Exception:
pass
# Auto-detect text file mode if source file is .txt or .pdf
if epub_path and epub_path.lower().endswith(('.txt', '.pdf')):
text_file_mode = True
if text_file_mode:
# For text mode, check for both .txt AND .html files (PDFs generate .html)
target_files = [f for f in files if f.lower().endswith(('.txt', '.html', '.xhtml', '.htm'))]
file_type = "TXT/HTML"
else:
target_files = [f for f in files if f.lower().endswith(('.html', '.xhtml', '.htm'))]
file_type = "HTML/XHTML"
if target_files:
folder_found = candidate
self.append_log(f"📁 Auto-selected output folder for {epub_base}: {folder_found}")
self.append_log(f" Found {len(target_files)} {file_type} files to scan")
break
else:
self.append_log(f" [{epub_base}] Folder exists but contains no {file_type} files: {candidate}")
except Exception as e:
self.append_log(f" [{epub_base}] Error checking files in {candidate}: {e}")
if folder_found:
folders_to_scan.append(folder_found)
self.append_log(f"🔍 DEBUG: Added to folders_to_scan: {folder_found}")
else:
self.append_log(f" ⚠️ No output folder found for {epub_base}")
except Exception as e:
self.append_log(f" ❌ Error processing {epub_base}: {e}")
self.append_log(f"🔍 DEBUG: Final folders_to_scan: {folders_to_scan}")
# Fallback behavior - if no folders found through auto-detection
if not folders_to_scan:
if auto_search_enabled:
# Auto-search failed, offer manual selection as fallback
self.append_log("⚠️ Auto-search enabled but no matching output folder found")
self.append_log("📁 Falling back to manual folder selection...")
selected_folder = QFileDialog.getExistingDirectory(
self,
"Auto-search failed - Select Output Folder to Scan"
)
if not selected_folder:
self.append_log("⚠️ QA scan canceled - no folder selected.")
return
# Verify the selected folder contains scannable files
try:
files = os.listdir(selected_folder)
# Respect text file mode when validating manual selection
text_file_mode = self.config.get('qa_text_file_mode', False)
if hasattr(self, 'qa_text_file_mode_checkbox'):
try:
text_file_mode = bool(self.qa_text_file_mode_checkbox.isChecked())
except Exception:
pass
if text_file_mode:
# For text mode, check for both .txt AND .html files (PDFs generate .html)
target_files = [f for f in files if f.lower().endswith(('.txt', '.html', '.xhtml', '.htm'))]
file_type = "TXT/HTML"
else:
target_files = [f for f in files if f.lower().endswith(('.html', '.xhtml', '.htm'))]
file_type = "HTML/XHTML"
if target_files:
folders_to_scan.append(selected_folder)
self.append_log(f"✓ Manual selection: {os.path.basename(selected_folder)} ({len(target_files)} {file_type} files)")
else:
self.append_log(f"❌ Selected folder contains no {file_type} files: {selected_folder}")
return
except Exception as e:
self.append_log(f"❌ Error checking selected folder: {e}")
return
if non_interactive:
# Add debug info for scanning phase
if epub_files_to_scan:
self.append_log(f"⚠️ Scanning phase: No matching output folders found for {len(epub_files_to_scan)} EPUB file(s)")
for epub_path in epub_files_to_scan:
epub_base = os.path.splitext(os.path.basename(epub_path))[0]
current_dir = os.getcwd()
expected_folder = os.path.join(current_dir, epub_base)
self.append_log(f" [{epub_base}] Expected: {expected_folder}")
self.append_log(f" [{epub_base}] Exists: {os.path.isdir(expected_folder)}")
# List actual folders in current directory for debugging
try:
current_dir = os.getcwd()
actual_folders = [d for d in os.listdir(current_dir) if os.path.isdir(os.path.join(current_dir, d)) and not d.startswith('.')]
if actual_folders:
self.append_log(f" Available folders: {', '.join(actual_folders[:10])}{'...' if len(actual_folders) > 10 else ''}")
except Exception:
pass
else:
self.append_log("⚠️ Scanning phase: No EPUB files available for folder detection")
self.append_log("⚠️ Skipping scan")
return
# Clean single folder selection - no messageboxes, no harassment
self.append_log("📁 Select folder to scan...")
folders_to_scan = []
# Simply select one folder - clean and simple
# Adjust caption to reflect current file mode
text_file_mode = self.config.get('qa_text_file_mode', False)
if hasattr(self, 'qa_text_file_mode_checkbox'):
try:
text_file_mode = bool(self.qa_text_file_mode_checkbox.isChecked())
except Exception:
pass
caption = "Select Folder with TXT Files" if text_file_mode else "Select Folder with HTML Files"
selected_folder = QFileDialog.getExistingDirectory(
self,
caption
)
if not selected_folder:
self.append_log("⚠️ QA scan canceled - no folder selected.")
return
folders_to_scan.append(selected_folder)
self.append_log(f" ✓ Selected folder: {os.path.basename(selected_folder)}")
self.append_log(f"📁 Single folder scan mode - scanning: {os.path.basename(folders_to_scan[0])}")
mode = selected_mode_value
# Initialize epub_path for use in run_scan() function
# This ensures epub_path is always defined even when manually selecting folders
epub_path = None
if epub_files_to_scan:
epub_path = epub_files_to_scan[0] # Use first EPUB if multiple
self.append_log(f"📚 Using EPUB from scan list: {os.path.basename(epub_path)}")
elif hasattr(self, 'selected_epub_path') and self.selected_epub_path:
epub_path = self.selected_epub_path
self.append_log(f"📚 Using stored EPUB: {os.path.basename(epub_path)}")
elif primary_epub_path:
epub_path = primary_epub_path
self.append_log(f"📚 Using primary EPUB: {os.path.basename(epub_path)}")
else:
self.append_log("ℹ️ No EPUB file configured (word count analysis will be disabled if needed)")
# Initialize global selected_files that applies to single-folder scans
global_selected_files = None
if len(folders_to_scan) == 1 and preselected_files:
global_selected_files = list(preselected_files)
elif len(folders_to_scan) == 1 and (not non_interactive) and (not auto_search_enabled):
# Scan all files in the folder - no messageboxes asking about specific files
# User can set up file preselection if they need specific files
pass
# Log bulk scan start
if len(folders_to_scan) == 1:
self.append_log(f"🔍 Starting QA scan in {mode.upper()} mode for folder: {folders_to_scan[0]}")
else:
self.append_log(f"🔍 Starting bulk QA scan in {mode.upper()} mode for {len(folders_to_scan)} folders")
self.stop_requested = False
# Extract cache configuration from qa_settings
cache_config = {
'enabled': qa_settings.get('cache_enabled', True),
'auto_size': qa_settings.get('cache_auto_size', False),
'show_stats': qa_settings.get('cache_show_stats', False),
'sizes': {}
}
# Get individual cache sizes
for cache_name in ['normalize_text', 'similarity_ratio', 'content_hashes',
'semantic_fingerprint', 'structural_signature', 'translation_artifacts']:
size = qa_settings.get(f'cache_{cache_name}', None)
if size is not None:
# Convert -1 to None for unlimited
cache_config['sizes'][cache_name] = None if size == -1 else size
# Create custom settings that includes cache config
custom_settings = {
'qa_settings': qa_settings,
'cache_config': cache_config,
'log_cache_stats': qa_settings.get('cache_show_stats', False)
}
def run_scan():
try:
# Extract cache configuration from qa_settings
cache_config = {
'enabled': qa_settings.get('cache_enabled', True),
'auto_size': qa_settings.get('cache_auto_size', False),
'show_stats': qa_settings.get('cache_show_stats', False),
'sizes': {}
}
# Get individual cache sizes
for cache_name in ['normalize_text', 'similarity_ratio', 'content_hashes',
'semantic_fingerprint', 'structural_signature', 'translation_artifacts']:
size = qa_settings.get(f'cache_{cache_name}', None)
if size is not None:
# Convert -1 to None for unlimited
cache_config['sizes'][cache_name] = None if size == -1 else size
# Configure the cache BEFORE calling scan_html_folder
from scan_html_folder import configure_qa_cache
configure_qa_cache(cache_config)
# Loop through all selected folders for bulk scanning
successful_scans = 0
failed_scans = 0
for i, current_folder in enumerate(folders_to_scan):
if self.stop_requested:
self.append_log(f"⚠️ Bulk scan stopped by user at folder {i+1}/{len(folders_to_scan)}")
break
folder_name = os.path.basename(current_folder)
if len(folders_to_scan) > 1:
self.append_log(f"\n📁 [{i+1}/{len(folders_to_scan)}] Scanning folder: {folder_name}")
# Determine the correct EPUB path for this specific folder
current_epub_path = epub_path
current_qa_settings = qa_settings.copy()
# For bulk scanning, try to find a matching EPUB for each folder
if len(folders_to_scan) > 1 and current_qa_settings.get('check_word_count_ratio', False):
# Try to find EPUB file matching this specific folder
folder_basename = os.path.basename(current_folder.rstrip('/\\'))
self.append_log(f" 🔍 Searching for EPUB matching folder: {folder_basename}")
# Look for EPUB in various locations
folder_parent = os.path.dirname(current_folder)
# Simple exact matching first, with minimal suffix handling
base_name = folder_basename
# Only handle the most common output suffixes
common_suffixes = ['_output', '_translated', '_en']
for suffix in common_suffixes:
if base_name.endswith(suffix):
base_name = base_name[:-len(suffix)]
break
# Simple EPUB search - focus on exact matching
search_names = [folder_basename] # Start with exact folder name
if base_name != folder_basename: # Add base name only if different
search_names.append(base_name)
potential_epub_paths = [
# Most common locations in order of priority
os.path.join(folder_parent, f"{folder_basename}.epub"), # Same directory as output folder
os.path.join(folder_parent, f"{base_name}.epub"), # Same directory with base name
os.path.join(current_folder, f"{folder_basename}.epub"), # Inside the output folder
os.path.join(current_folder, f"{base_name}.epub"), # Inside with base name
]
# Find the first existing EPUB
folder_epub_path = None
for potential_path in potential_epub_paths:
if os.path.isfile(potential_path):
folder_epub_path = potential_path
if len(folders_to_scan) > 1:
self.append_log(f" Found matching EPUB: {os.path.basename(potential_path)}")
break
if folder_epub_path:
current_epub_path = folder_epub_path
if len(folders_to_scan) > 1: # Only log for bulk scans
self.append_log(f" 📖 Using EPUB: {os.path.basename(current_epub_path)}")
else:
# NO FALLBACK TO GLOBAL EPUB FOR BULK SCANS - This prevents wrong EPUB usage!
if len(folders_to_scan) > 1:
self.append_log(f" ⚠️ No matching EPUB found for folder '{folder_name}' - disabling word count analysis")
expected_names = ', '.join([f"{name}.epub" for name in search_names])
self.append_log(f" Expected EPUB names: {expected_names}")
current_epub_path = None
elif current_epub_path: # Single folder scan can use global EPUB
self.append_log(f" 📖 Using global EPUB: {os.path.basename(current_epub_path)} (no folder-specific EPUB found)")
else:
current_epub_path = None
# Disable word count analysis when no matching EPUB is found
if not current_epub_path:
current_qa_settings = current_qa_settings.copy()
current_qa_settings['check_word_count_ratio'] = False
# Check for EPUB/folder name mismatch
if current_epub_path and current_qa_settings.get('check_word_count_ratio', False) and current_qa_settings.get('warn_name_mismatch', True):
epub_name = os.path.splitext(os.path.basename(current_epub_path))[0]
folder_name_for_check = os.path.basename(current_folder.rstrip('/\\'))
if not check_epub_folder_match(epub_name, folder_name_for_check, current_qa_settings.get('custom_output_suffixes', '')):
if len(folders_to_scan) == 1:
# Interactive dialog for single folder scans
msg = QMessageBox(self)
msg.setIcon(QMessageBox.Warning)
msg.setWindowTitle("EPUB/Folder Name Mismatch")
msg.setText(f"The source EPUB and output folder names don't match:\n\n"
f"📖 EPUB: {epub_name}\n"
f"📁 Folder: {folder_name_for_check}\n\n"
"This might mean you're comparing the wrong files.")
msg.setInformativeText("Would you like to:\n"
"• YES - Continue anyway (I'm sure these match)\n"
"• NO - Select a different EPUB file\n"
"• CANCEL - Cancel the scan")
msg.setStandardButtons(QMessageBox.Yes | QMessageBox.No | QMessageBox.Cancel)
result = msg.exec()
if result == QMessageBox.Cancel:
self.append_log("⚠️ QA scan canceled due to EPUB/folder mismatch.")
return
elif result == QMessageBox.No: # No - select different EPUB
# Determine if text file mode is enabled
text_file_mode = self.config.get('qa_text_file_mode', False)
if hasattr(self, 'qa_text_file_mode_checkbox'):
try:
text_file_mode = bool(self.qa_text_file_mode_checkbox.isChecked())
except Exception:
pass
if text_file_mode:
new_epub_path, _ = QFileDialog.getOpenFileName(
self,
"Select Different Source Text/PDF File",
"",
"Source files (*.txt *.pdf);;Text files (*.txt);;PDF files (*.pdf);;All files (*.*)"
)
else:
new_epub_path, _ = QFileDialog.getOpenFileName(
self,
"Select Different Source EPUB File",
"",
"EPUB files (*.epub);;All files (*.*)"
)
if new_epub_path:
current_epub_path = new_epub_path
self.selected_epub_path = new_epub_path
self.config['last_epub_path'] = new_epub_path
self.save_config(show_message=False)
self.append_log(f"✅ Updated EPUB: {os.path.basename(new_epub_path)}")
else:
proceed = QMessageBox.question(
self,
"No File Selected",
"No EPUB file was selected.\n\n" +
"Continue scan without word count analysis?",
QMessageBox.Yes | QMessageBox.No
)
if proceed == QMessageBox.No:
self.append_log("⚠️ QA scan canceled.")
return
else:
current_qa_settings = current_qa_settings.copy()
current_qa_settings['check_word_count_ratio'] = False
current_epub_path = None
self.append_log("ℹ️ Proceeding without word count analysis.")
# If YES, just continue with warning
else:
# For bulk scans, just warn and continue
self.append_log(f" ⚠️ Warning: EPUB/folder name mismatch - {epub_name} vs {folder_name_for_check}")
try:
# Determine selected_files for this folder
current_selected_files = None
if global_selected_files and len(folders_to_scan) == 1:
current_selected_files = global_selected_files
# Auto-detect PDF source file if not already set
# Check if the folder name matches a .pdf file in the parent directory
if not current_epub_path or not os.path.exists(current_epub_path):
folder_basename = os.path.basename(current_folder)
parent_dir = os.path.dirname(current_folder)
# Try to find a matching PDF file
potential_pdf = os.path.join(parent_dir, folder_basename + ".pdf")
if os.path.exists(potential_pdf):
current_epub_path = potential_pdf
self.append_log(f" 📄 Auto-detected PDF source: {os.path.basename(potential_pdf)}")
else:
# Also try without folder suffix if it has one
potential_pdf_alt = os.path.join(parent_dir, folder_basename.replace("_output", "") + ".pdf")
if os.path.exists(potential_pdf_alt):
current_epub_path = potential_pdf_alt
self.append_log(f" 📄 Auto-detected PDF source: {os.path.basename(potential_pdf_alt)}")
# Pass the QA settings to scan_html_folder
# Don't pass text_file_mode explicitly - let scan_html_folder auto-detect from epub_path
# Get scan_html_folder from translator_gui's global scope
import translator_gui
scan_func = translator_gui.scan_html_folder
scan_func(
current_folder,
log=self.append_log,
stop_flag=lambda: self.stop_requested,
mode=mode,
qa_settings=current_qa_settings,
epub_path=current_epub_path,
selected_files=current_selected_files,
text_file_mode=None # Let it auto-detect from epub_path extension
)
successful_scans += 1
# Record last generated report path for quick access
report_path = os.path.join(current_folder, "validation_results.html")
if os.path.exists(report_path):
self.last_qa_report_path = report_path
if len(folders_to_scan) > 1:
self.append_log(f"✅ Folder '{folder_name}' scan completed successfully")
except Exception as folder_error:
failed_scans += 1
self.append_log(f"❌ Folder '{folder_name}' scan failed: {folder_error}")
if len(folders_to_scan) == 1:
# Re-raise for single folder scans
raise
# Final summary for bulk scans
if len(folders_to_scan) > 1:
self.append_log(f"\n📋 Bulk scan summary: {successful_scans} successful, {failed_scans} failed")
# If show_stats is enabled, log cache statistics
if qa_settings.get('cache_show_stats', False):
from scan_html_folder import get_cache_info
cache_stats = get_cache_info()
self.append_log("\n📊 Cache Performance Statistics:")
for name, info in cache_stats.items():
if info: # Check if info exists
hit_rate = info.hits / (info.hits + info.misses) if (info.hits + info.misses) > 0 else 0
self.append_log(f" {name}: {info.hits} hits, {info.misses} misses ({hit_rate:.1%} hit rate)")
if len(folders_to_scan) == 1:
self.append_log("✅ QA scan completed successfully.")
else:
self.append_log("✅ Bulk QA scan completed.")
except Exception as e:
self.append_log(f"❌ QA scan error: {e}")
self.append_log(f"Traceback: {traceback.format_exc()}")
finally:
# Clear thread/future refs so buttons re-enable
self.qa_thread = None
if hasattr(self, 'qa_future'):
try:
self.qa_future = None
except Exception:
pass
# Emit signal to update button (thread-safe)
self.thread_complete_signal.emit()
# Run via shared executor
self._ensure_executor()
if self.executor:
self.qa_future = self.executor.submit(run_scan)
# Ensure UI is refreshed when QA work completes (button update handled by thread_complete_signal in finally block)
def _qa_done_callback(f):
try:
self.qa_future = None
except Exception:
pass
try:
self.qa_future.add_done_callback(_qa_done_callback)
except Exception:
pass
else:
self.qa_thread = threading.Thread(target=run_scan, daemon=True)
self.qa_thread.start()
# Update button IMMEDIATELY after starting thread (synchronous)
self.update_run_button()
def show_qa_scanner_settings(self, parent_dialog, qa_settings):
"""Show QA Scanner settings dialog"""
# Create settings dialog
dialog = QDialog(parent_dialog)
try:
self._qa_settings_dialog = dialog
dialog.finished.connect(lambda *_: setattr(self, "_qa_settings_dialog", None))
except Exception:
pass
# Apply basic dark stylesheet IMMEDIATELY to prevent white flash
# Set up icon path
icon_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'Halgakos.ico')
dialog.setStyleSheet("""
QDialog {
background-color: #2d2d2d;
color: white;
}
QGroupBox {
color: white;
border: 1px solid #555;
margin: 10px;
padding-top: 10px;
}
QGroupBox::title {
color: white;
left: 10px;
padding: 0 5px;
}
QLabel {
color: white;
}
QPushButton {
background-color: #404040;
color: white;
border: 1px solid #555;
padding: 5px;
}
QPushButton:hover {
background-color: #505050;
}
QComboBox {
background-color: #404040;
color: white;
border: 1px solid #555;
padding: 5px;
padding-right: 25px;
}
QComboBox:hover {
background-color: #505050;
border: 1px solid #777;
}
QComboBox::drop-down {
subcontrol-origin: padding;
subcontrol-position: top right;
width: 30px;
border-left: 1px solid #555;
}
QComboBox::down-arrow {
image: url(""" + icon_path.replace('\\', '/') + """);
width: 16px;
height: 16px;
}
QComboBox:on {
border: 1px solid #888;
}
QComboBox QAbstractItemView {
background-color: #404040;
color: white;
border: 1px solid #555;
selection-background-color: #505050;
}
""")
dialog.setWindowTitle("QA Scanner Settings")
dialog.setModal(True)
# Use screen ratios: 40% width, 85% height (decreased from 100%)
screen = QApplication.primaryScreen().geometry()
settings_width = int(screen.width() * 0.52)
settings_height = int(screen.height() * 0.85)
dialog.resize(settings_width, settings_height)
# Set window icon and prepare icon path for comboboxes
try:
base_dir = sys._MEIPASS if getattr(sys, 'frozen', False) else os.path.dirname(os.path.abspath(__file__))
icon_path = os.path.join(base_dir, 'Halgakos.ico')
if not os.path.exists(icon_path):
icon_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'Halgakos.ico')
if not os.path.exists(icon_path):
icon_path = os.path.join(os.getcwd(), 'Halgakos.ico')
if os.path.exists(icon_path):
dialog.setWindowIcon(QIcon(icon_path))
except Exception:
icon_path = os.path.join(os.getcwd(), 'Halgakos.ico')
# Main layout
main_layout = QVBoxLayout(dialog)
# Scroll area
scroll = QScrollArea()
scroll.setWidgetResizable(True)
scroll.setHorizontalScrollBarPolicy(Qt.ScrollBarAsNeeded)
scroll.setVerticalScrollBarPolicy(Qt.ScrollBarAsNeeded)
# Scrollable content widget
scroll_widget = QWidget()
scroll_widget.setObjectName('scroll_widget')
scroll_layout = QVBoxLayout(scroll_widget)
scroll_layout.setContentsMargins(30, 20, 30, 20)
scroll.setWidget(scroll_widget)
main_layout.addWidget(scroll)
# Helper function to disable mousewheel on spinboxes and comboboxes
def disable_wheel_event(widget):
widget.wheelEvent = lambda event: event.ignore()
# Word count multiplier defaults (factory) - character-based ratios
base_multiplier_defaults = {
'english': 1.0, 'spanish': 1.10, 'french': 1.10, 'german': 1.05, 'italian': 1.05,
'portuguese': 1.10, 'russian': 1.15, 'arabic': 1.15, 'hindi': 1.10, 'turkish': 1.05,
'chinese': 2.50, 'chinese (simplified)': 2.50, 'chinese (traditional)': 2.50,
'japanese': 2.20, 'korean': 2.30, 'hebrew': 1.05, 'thai': 1.10,
'other': 1.0
}
# Merge current settings over factory defaults for initial display
wordcount_defaults = dict(base_multiplier_defaults)
user_mults = qa_settings.get('word_count_multipliers', {})
if isinstance(user_mults, dict):
wordcount_defaults.update(user_mults)
# Immutable factory defaults for reset
default_wordcount_defaults = dict(base_multiplier_defaults)
# Title
title_label = QLabel("QA Scanner Settings")
title_label.setFont(QFont('Arial', 24, QFont.Bold))
title_label.setAlignment(Qt.AlignCenter)
scroll_layout.addWidget(title_label)
scroll_layout.addSpacing(20)
# Foreign Character Settings Section
foreign_group = QGroupBox("Foreign Character Detection")
foreign_group.setFont(QFont('Arial', 12, QFont.Bold))
foreign_layout = QVBoxLayout(foreign_group)
foreign_layout.setContentsMargins(20, 15, 20, 15)
scroll_layout.addWidget(foreign_group)
# Source Language setting (for multiplier selection)
source_lang_widget = QWidget()
source_lang_layout = QHBoxLayout(source_lang_widget)
source_lang_layout.setContentsMargins(0, 0, 0, 6)
source_lang_label = QLabel("Source language (for word-count multiplier):")
source_lang_label.setFont(QFont('Arial', 10))
source_lang_layout.addWidget(source_lang_label)
source_language_options = [
'Auto', 'Chinese (Simplified)', 'Chinese (Traditional)',
'Japanese', 'Korean', 'English', 'Spanish', 'French', 'German',
'Italian', 'Portuguese', 'Russian', 'Arabic', 'Hindi', 'Turkish',
'Hebrew', 'Thai', 'Other'
]
source_lang_combo = QComboBox()
source_lang_combo.setEditable(True)
source_lang_combo.addItems(source_language_options)
source_lang_combo.setCurrentText(qa_settings.get('source_language', 'Auto').title())
source_lang_combo.setMinimumWidth(240)
disable_wheel_event(source_lang_combo)
source_lang_layout.addWidget(source_lang_combo)
source_lang_hint = QLabel("(Auto = detect via script/CJK heuristics)")
source_lang_hint.setFont(QFont('Arial', 9))
source_lang_hint.setStyleSheet("color: gray;")
source_lang_layout.addWidget(source_lang_hint)
source_lang_layout.addStretch()
foreign_layout.addWidget(source_lang_widget)
# Target Language setting
target_lang_widget = QWidget()
target_lang_layout = QHBoxLayout(target_lang_widget)
target_lang_layout.setContentsMargins(0, 0, 0, 10)
target_lang_label = QLabel("Target language:")
target_lang_label.setFont(QFont('Arial', 10))
target_lang_layout.addWidget(target_lang_label)
# Capitalize the stored value for display in combobox
stored_language = qa_settings.get('target_language', 'english')
display_language = stored_language.capitalize()
target_language_options = [
'English', 'Spanish', 'French', 'German', 'Italian', 'Portuguese',
'Russian', 'Arabic', 'Hindi', 'Chinese (Simplified)',
'Chinese (Traditional)', 'Japanese', 'Korean', 'Turkish',
'Hebrew', 'Thai'
]
target_language_combo = QComboBox()
target_language_combo.setEditable(True)
target_language_combo.addItems(target_language_options)
target_language_combo.setMinimumWidth(360)
target_language_combo.setMinimumContentsLength(24) # ensure popup and line edit stay wide
target_language_combo.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Fixed)
# Prefer the main GUI's target language for display if available
initial_display = self.config.get('output_language') or display_language
target_language_combo.setCurrentText(initial_display)
target_language_combo.setMinimumWidth(150)
disable_wheel_event(target_language_combo)
target_lang_layout.addWidget(target_language_combo)
target_lang_hint = QLabel("(characters from other scripts will be flagged)")
target_lang_hint.setFont(QFont('Arial', 9))
target_lang_hint.setStyleSheet("color: gray;")
target_lang_layout.addWidget(target_lang_hint)
target_lang_layout.addStretch()
foreign_layout.addWidget(target_lang_widget)
# Threshold setting
threshold_widget = QWidget()
threshold_layout = QHBoxLayout(threshold_widget)
threshold_layout.setContentsMargins(0, 10, 0, 10)
threshold_label = QLabel("Minimum foreign characters to flag:")
threshold_label.setFont(QFont('Arial', 10))
threshold_layout.addWidget(threshold_label)
threshold_spinbox = QSpinBox()
threshold_spinbox.setMinimum(0)
threshold_spinbox.setMaximum(1000)
threshold_spinbox.setValue(qa_settings.get('foreign_char_threshold', 10))
threshold_spinbox.setMinimumWidth(100)
disable_wheel_event(threshold_spinbox)
threshold_layout.addWidget(threshold_spinbox)
threshold_hint = QLabel("(0 = always flag, higher = more tolerant)")
threshold_hint.setFont(QFont('Arial', 9))
threshold_hint.setStyleSheet("color: gray;")
threshold_layout.addWidget(threshold_hint)
threshold_layout.addStretch()
foreign_layout.addWidget(threshold_widget)
# Excluded characters
excluded_label = QLabel("Additional characters to exclude from detection:")
excluded_label.setFont(QFont('Arial', 10))
foreign_layout.addWidget(excluded_label)
# Text edit for excluded characters
excluded_text = QTextEdit()
excluded_text.setMaximumHeight(150)
excluded_text.setFont(QFont('Consolas', 10))
excluded_text.setPlainText(qa_settings.get('excluded_characters', ''))
foreign_layout.addWidget(excluded_text)
excluded_hint = QLabel("Enter characters separated by spaces (e.g., ™ © ® • …)")
excluded_hint.setFont(QFont('Arial', 9))
excluded_hint.setStyleSheet("color: gray;")
foreign_layout.addWidget(excluded_hint)
scroll_layout.addSpacing(20)
# Detection Options Section
detection_group = QGroupBox("Detection Options")
detection_group.setFont(QFont('Arial', 12, QFont.Bold))
detection_layout = QVBoxLayout(detection_group)
detection_layout.setContentsMargins(20, 15, 20, 15)
scroll_layout.addWidget(detection_group)
# Checkboxes for detection options
check_encoding_checkbox = self._create_styled_checkbox("Check for encoding issues (�, □, ◇)")
check_encoding_checkbox.setChecked(qa_settings.get('check_encoding_issues', False))
detection_layout.addWidget(check_encoding_checkbox)
check_repetition_checkbox = self._create_styled_checkbox("Check for excessive repetition")
check_repetition_checkbox.setChecked(qa_settings.get('check_repetition', True))
detection_layout.addWidget(check_repetition_checkbox)
check_artifacts_checkbox = self._create_styled_checkbox("Check for translation artifacts (MTL notes, watermarks)")
check_artifacts_checkbox.setChecked(qa_settings.get('check_translation_artifacts', False))
detection_layout.addWidget(check_artifacts_checkbox)
# Separate toggle for AI artifacts
check_ai_artifacts_checkbox = self._create_styled_checkbox("Check for AI artifacts (\"Sure, here’s…\", thinking tags, JSON)")
check_ai_artifacts_checkbox.setChecked(qa_settings.get('check_ai_artifacts', False))
check_ai_artifacts_checkbox.setContentsMargins(20, 0, 0, 0)
detection_layout.addWidget(check_ai_artifacts_checkbox)
check_punctuation_checkbox = self._create_styled_checkbox("Check ?! punctuation mismatches (compares with source file)")
check_punctuation_checkbox.setChecked(qa_settings.get('check_punctuation_mismatch', False))
detection_layout.addWidget(check_punctuation_checkbox)
# Punctuation loss threshold setting (indented under the checkbox)
punct_threshold_widget = QWidget()
punct_threshold_layout = QHBoxLayout(punct_threshold_widget)
punct_threshold_layout.setContentsMargins(20, 0, 0, 10)
punct_threshold_label = QLabel("Flag if lost >")
punct_threshold_label.setFont(QFont('Arial', 10))
punct_threshold_layout.addWidget(punct_threshold_label)
punct_threshold_spinbox = QSpinBox()
punct_threshold_spinbox.setMinimum(0)
punct_threshold_spinbox.setMaximum(100)
punct_threshold_spinbox.setValue(qa_settings.get('punctuation_loss_threshold', 49))
punct_threshold_spinbox.setSuffix("%")
punct_threshold_spinbox.setMinimumWidth(80)
disable_wheel_event(punct_threshold_spinbox)
punct_threshold_layout.addWidget(punct_threshold_spinbox)
punct_threshold_hint = QLabel("(0 = flag all, 49 = flag if half lost, 100 = only flag if all lost)")
punct_threshold_hint.setFont(QFont('Arial', 9))
punct_threshold_hint.setStyleSheet("color: gray;")
punct_threshold_layout.addWidget(punct_threshold_hint)
punct_threshold_layout.addStretch()
detection_layout.addWidget(punct_threshold_widget)
# Enable/disable punctuation threshold controls based on checkbox
def toggle_punctuation_threshold(checked):
punct_threshold_label.setEnabled(checked)
punct_threshold_spinbox.setEnabled(checked)
punct_threshold_hint.setEnabled(checked)
if checked:
punct_threshold_label.setStyleSheet("color: white;")
punct_threshold_spinbox.setStyleSheet("color: white;")
punct_threshold_hint.setStyleSheet("color: gray;")
else:
punct_threshold_label.setStyleSheet("color: #606060;")
punct_threshold_spinbox.setStyleSheet("color: #909090;")
punct_threshold_hint.setStyleSheet("color: #404040;")
check_punctuation_checkbox.toggled.connect(toggle_punctuation_threshold)
toggle_punctuation_threshold(check_punctuation_checkbox.isChecked()) # Set initial state
# Excess punctuation checkbox (indented under the punctuation checker)
excess_punct_widget = QWidget()
excess_punct_layout = QHBoxLayout(excess_punct_widget)
excess_punct_layout.setContentsMargins(20, 0, 0, 0)
excess_punct_checkbox = self._create_styled_checkbox("Flag excess punctuation (more ? or ! than source)")
excess_punct_checkbox.setChecked(qa_settings.get('flag_excess_punctuation', False))
excess_punct_layout.addWidget(excess_punct_checkbox)
excess_punct_layout.addStretch()
detection_layout.addWidget(excess_punct_widget)
# Excess punctuation threshold setting (indented under the excess checkbox)
excess_threshold_widget = QWidget()
excess_threshold_layout = QHBoxLayout(excess_threshold_widget)
excess_threshold_layout.setContentsMargins(40, 0, 0, 10)
excess_threshold_label = QLabel("Flag if excess >")
excess_threshold_label.setFont(QFont('Arial', 10))
excess_threshold_layout.addWidget(excess_threshold_label)
excess_threshold_spinbox = QSpinBox()
excess_threshold_spinbox.setMinimum(0)
excess_threshold_spinbox.setMaximum(500)
excess_threshold_spinbox.setValue(qa_settings.get('excess_punctuation_threshold', 49))
excess_threshold_spinbox.setSuffix("%")
excess_threshold_spinbox.setMinimumWidth(80)
disable_wheel_event(excess_threshold_spinbox)
excess_threshold_layout.addWidget(excess_threshold_spinbox)
excess_threshold_hint = QLabel("(0 = flag all excess, 49 = flag if half excess, 100 = only flag if doubled)")
excess_threshold_hint.setFont(QFont('Arial', 9))
excess_threshold_hint.setStyleSheet("color: gray;")
excess_threshold_layout.addWidget(excess_threshold_hint)
excess_threshold_layout.addStretch()
detection_layout.addWidget(excess_threshold_widget)
# Enable/disable excess punctuation controls based on main and excess checkboxes
def toggle_excess_punct(main_checked):
excess_enabled = main_checked
excess_punct_checkbox.setEnabled(excess_enabled)
# Threshold only enabled if both main and excess checkboxes are checked
threshold_enabled = main_checked and excess_punct_checkbox.isChecked()
excess_threshold_label.setEnabled(threshold_enabled)
excess_threshold_spinbox.setEnabled(threshold_enabled)
excess_threshold_hint.setEnabled(threshold_enabled)
if excess_enabled:
excess_punct_checkbox.setStyleSheet("color: white;")
else:
excess_punct_checkbox.setStyleSheet("color: #606060;")
if threshold_enabled:
excess_threshold_label.setStyleSheet("color: white;")
excess_threshold_spinbox.setStyleSheet("color: white;")
excess_threshold_hint.setStyleSheet("color: gray;")
else:
excess_threshold_label.setStyleSheet("color: #606060;")
excess_threshold_spinbox.setStyleSheet("color: #909090;")
excess_threshold_hint.setStyleSheet("color: #404040;")
def toggle_excess_threshold(excess_checked):
# Re-evaluate based on current state
toggle_excess_punct(check_punctuation_checkbox.isChecked())
check_punctuation_checkbox.toggled.connect(toggle_excess_punct)
excess_punct_checkbox.toggled.connect(toggle_excess_threshold)
toggle_excess_punct(check_punctuation_checkbox.isChecked()) # Set initial state
check_glossary_checkbox = self._create_styled_checkbox("Check for glossary leakage (raw glossary entries in translation)")
check_glossary_checkbox.setChecked(qa_settings.get('check_glossary_leakage', True))
detection_layout.addWidget(check_glossary_checkbox)
scroll_layout.addSpacing(20)
# File Processing Section
file_group = QGroupBox("File Processing")
file_group.setFont(QFont('Arial', 12, QFont.Bold))
file_layout = QVBoxLayout(file_group)
file_layout.setContentsMargins(20, 15, 20, 15)
scroll_layout.addWidget(file_group)
# Minimum file length
min_length_widget = QWidget()
min_length_layout = QHBoxLayout(min_length_widget)
min_length_layout.setContentsMargins(0, 0, 0, 10)
min_length_label = QLabel("Minimum file length (characters):")
min_length_label.setFont(QFont('Arial', 10))
min_length_layout.addWidget(min_length_label)
min_length_spinbox = QSpinBox()
min_length_spinbox.setMinimum(0)
min_length_spinbox.setMaximum(10000)
min_length_spinbox.setValue(qa_settings.get('min_file_length', 0))
min_length_spinbox.setMinimumWidth(100)
disable_wheel_event(min_length_spinbox)
min_length_layout.addWidget(min_length_spinbox)
min_length_layout.addStretch()
file_layout.addWidget(min_length_widget)
# Minimum duplicate word count
min_dup_words_widget = QWidget()
min_dup_words_layout = QHBoxLayout(min_dup_words_widget)
min_dup_words_layout.setContentsMargins(0, 10, 0, 10)
min_dup_words_label = QLabel("Skip small files as duplicates if <N words:")
min_dup_words_label.setFont(QFont('Arial', 10))
min_dup_words_layout.addWidget(min_dup_words_label)
min_dup_words_spinbox = QSpinBox()
min_dup_words_spinbox.setMinimum(0)
min_dup_words_spinbox.setMaximum(999999)
min_dup_words_spinbox.setSingleStep(50)
min_dup_words_spinbox.setValue(qa_settings.get('min_duplicate_word_count', 500))
min_dup_words_spinbox.setMinimumWidth(100)
disable_wheel_event(min_dup_words_spinbox)
min_dup_words_layout.addWidget(min_dup_words_spinbox)
min_dup_hint = QLabel("(prevents section/notice files from being flagged)")
min_dup_hint.setFont(QFont('Arial', 9))
min_dup_hint.setStyleSheet("color: gray;")
min_dup_words_layout.addWidget(min_dup_hint)
min_dup_words_layout.addStretch()
file_layout.addWidget(min_dup_words_widget)
# Minimum text length for spacing/linebreaks check
min_spacing_text_widget = QWidget()
min_spacing_text_layout = QHBoxLayout(min_spacing_text_widget)
min_spacing_text_layout.setContentsMargins(0, 10, 0, 10)
min_spacing_text_label = QLabel("Minimum text length for spacing check (characters):")
min_spacing_text_label.setFont(QFont('Arial', 10))
min_spacing_text_layout.addWidget(min_spacing_text_label)
min_spacing_text_spinbox = QSpinBox()
min_spacing_text_spinbox.setMinimum(0)
min_spacing_text_spinbox.setMaximum(999999)
min_spacing_text_spinbox.setSingleStep(10)
min_spacing_text_spinbox.setValue(qa_settings.get('min_text_length_for_spacing', 100))
min_spacing_text_spinbox.setMinimumWidth(100)
disable_wheel_event(min_spacing_text_spinbox)
min_spacing_text_layout.addWidget(min_spacing_text_spinbox)
min_spacing_hint = QLabel("(skips files with very little content like cover pages)")
min_spacing_hint.setFont(QFont('Arial', 9))
min_spacing_hint.setStyleSheet("color: gray;")
min_spacing_text_layout.addWidget(min_spacing_hint)
min_spacing_text_layout.addStretch()
file_layout.addWidget(min_spacing_text_widget)
scroll_layout.addSpacing(15)
# Word Count Cross-Reference Section
wordcount_group = QGroupBox("Word Count Analysis")
wordcount_group.setFont(QFont('Arial', 12, QFont.Bold))
wordcount_layout = QVBoxLayout(wordcount_group)
wordcount_layout.setContentsMargins(20, 15, 20, 15)
scroll_layout.addWidget(wordcount_group)
check_word_count_checkbox = self._create_styled_checkbox("Cross-reference word counts with original source file")
check_word_count_checkbox.setChecked(qa_settings.get('check_word_count_ratio', True))
wordcount_layout.addWidget(check_word_count_checkbox)
wordcount_desc = QLabel("Compares word counts between original and translated files to detect missing content.\n" +
"For EPUB: accounts for typical expansion ratios when translating from CJK to English.\n" +
"For Text: compares each section file against the total source .txt word count.")
wordcount_desc.setFont(QFont('Arial', 9))
wordcount_desc.setStyleSheet("color: gray;")
wordcount_desc.setWordWrap(True)
wordcount_desc.setMaximumWidth(700)
wordcount_layout.addWidget(wordcount_desc)
# Counting mode options
counting_mode_widget = QWidget()
counting_mode_layout = QHBoxLayout(counting_mode_widget)
counting_mode_layout.setContentsMargins(0, 10, 0, 5)
counting_mode_label = QLabel("Counting mode:")
counting_mode_label.setFont(QFont('Arial', 10))
counting_mode_layout.addWidget(counting_mode_label)
# Get current mode from saved settings (default: exact)
saved_counting_mode = qa_settings.get('counting_mode', 'exact')
counting_mode_combo = QComboBox()
counting_mode_combo.addItem("Character count (sampled) - Fastest", "sampled")
counting_mode_combo.addItem("Character count (exact) - Default", "exact")
counting_mode_combo.addItem("Word count (legacy)", "word")
counting_mode_combo.setMinimumWidth(250)
counting_mode_combo.wheelEvent = lambda event: event.ignore()
# Set current selection based on saved settings
if saved_counting_mode == 'word':
counting_mode_combo.setCurrentIndex(2) # Word count
elif saved_counting_mode == 'sampled':
counting_mode_combo.setCurrentIndex(0) # Sampled
else:
counting_mode_combo.setCurrentIndex(1) # Exact (default)
counting_mode_layout.addWidget(counting_mode_combo)
counting_mode_layout.addStretch()
wordcount_layout.addWidget(counting_mode_widget)
# Word count multiplier sliders (2-column grid)
multipliers_label = QLabel("Expected translation length multiplier (translated words ÷ source words)")
multipliers_label.setFont(QFont('Arial', 10, QFont.Bold))
wordcount_layout.addWidget(multipliers_label)
multiplier_hint = QLabel("Adjust per-language expansion. 100% = same length as source; 150% = 1.5x longer.")
multiplier_hint.setFont(QFont('Arial', 9))
multiplier_hint.setStyleSheet("color: gray;")
multiplier_hint.setWordWrap(True)
multiplier_hint.setMaximumWidth(700)
wordcount_layout.addWidget(multiplier_hint)
# Auto toggle for using default multipliers
auto_multipliers_widget = QWidget()
auto_multipliers_layout = QHBoxLayout(auto_multipliers_widget)
auto_multipliers_layout.setContentsMargins(0, 10, 0, 10)
auto_multipliers_checkbox = self._create_styled_checkbox("Auto: Use recommended default multipliers")
auto_multipliers_checkbox.setChecked(qa_settings.get('use_auto_multipliers', True)) # Enabled by default
auto_multipliers_layout.addWidget(auto_multipliers_checkbox)
auto_multipliers_hint = QLabel("(disable to customize per-language ratios)")
auto_multipliers_hint.setFont(QFont('Arial', 9))
auto_multipliers_hint.setStyleSheet("color: gray;")
auto_multipliers_layout.addWidget(auto_multipliers_hint)
auto_multipliers_layout.addStretch()
wordcount_layout.addWidget(auto_multipliers_widget)
multiplier_grid_widget = QWidget()
multiplier_grid = QGridLayout(multiplier_grid_widget)
multiplier_grid.setContentsMargins(0, 6, 0, 6)
multiplier_grid.setHorizontalSpacing(16)
multiplier_grid.setVerticalSpacing(8)
wordcount_layout.addWidget(multiplier_grid_widget)
# Keep slider refs for saving and enabling/disabling
word_multiplier_sliders = {}
word_multiplier_labels = []
# Ordered language list for stable UI
multiplier_order = [
'english', 'spanish', 'french', 'german', 'italian', 'portuguese',
'russian', 'arabic', 'hindi', 'turkish',
'chinese', 'chinese (simplified)', 'chinese (traditional)',
'japanese', 'korean', 'hebrew', 'thai', 'other'
]
# Build sliders in 2 columns
for idx, lang_key in enumerate(multiplier_order):
row = idx // 2
col = idx % 2
row_widget = QWidget()
row_layout = QHBoxLayout(row_widget)
row_layout.setContentsMargins(0, 0, 0, 0)
display_name = lang_key.capitalize() if '(' not in lang_key else lang_key.title()
lang_label = QLabel(display_name + ":")
lang_label.setFont(QFont('Arial', 9))
row_layout.addWidget(lang_label)
# Slider
slider = QSlider(Qt.Horizontal)
slider.setMinimum(10) # 0.10x
slider.setMaximum(1000) # 10.0x
slider.setSingleStep(5)
slider.setTickInterval(50)
slider.setMinimumWidth(140)
slider.wheelEvent = lambda event: event.ignore()
current_mult = wordcount_defaults.get(lang_key, 1.0)
slider.setValue(int(current_mult * 100))
row_layout.addWidget(slider)
# Editable spinbox (same range, %)
spin = QSpinBox()
spin.setMinimum(10)
spin.setMaximum(1000)
spin.setSingleStep(1)
spin.setValue(int(current_mult * 100))
spin.setSuffix("%")
spin.setMinimumWidth(70)
spin.wheelEvent = lambda event: event.ignore()
row_layout.addWidget(spin)
# Keep in sync both ways
slider.valueChanged.connect(spin.setValue)
spin.valueChanged.connect(slider.setValue)
word_multiplier_sliders[lang_key] = slider
word_multiplier_sliders[f"{lang_key}__spin"] = spin
word_multiplier_labels.append(lang_label)
multiplier_grid.addWidget(row_widget, row, col)
# Function to toggle multiplier controls based on auto checkbox
def toggle_multiplier_controls(auto_enabled):
for lang_key in multiplier_order:
slider = word_multiplier_sliders.get(lang_key)
spin = word_multiplier_sliders.get(f"{lang_key}__spin")
if slider:
slider.setEnabled(not auto_enabled)
if spin:
spin.setEnabled(not auto_enabled)
# Apply enable/disable styling to spinbox
if auto_enabled:
spin.setStyleSheet("background-color: #303030; color: #808080;")
else:
spin.setStyleSheet("background-color: #404040; color: white;") # Enabled styling
# Apply enable/disable styling to labels
for label in word_multiplier_labels:
if auto_enabled:
label.setStyleSheet("color: #808080;") # Gray out when disabled
else:
label.setStyleSheet("color: white;") # White when enabled
# Connect auto checkbox to toggle function
auto_multipliers_checkbox.toggled.connect(toggle_multiplier_controls)
# Set initial state
toggle_multiplier_controls(auto_multipliers_checkbox.isChecked())
wordcount_layout.addSpacing(6)
# Show current EPUB status and allow selection
epub_widget = QWidget()
epub_layout = QHBoxLayout(epub_widget)
epub_layout.setContentsMargins(0, 10, 0, 5)
# Get source files (EPUB, TXT, PDF, or MD) from actual current selection
current_epub_files = []
if hasattr(self, 'selected_files') and self.selected_files:
current_epub_files = [
f for f in self.selected_files
if f.lower().endswith(('.epub', '.txt', '.pdf', '.md'))
]
if len(current_epub_files) > 1:
# Multiple source files in current selection
primary_file = os.path.basename(current_epub_files[0])
status_text = f"📖 {len(current_epub_files)} source files selected (Primary: {primary_file})"
status_color = 'green'
elif len(current_epub_files) == 1:
# Single source file in current selection
file_name = os.path.basename(current_epub_files[0])
lower_name = current_epub_files[0].lower()
if lower_name.endswith('.txt'):
file_type = "TXT"
elif lower_name.endswith('.pdf'):
file_type = "PDF"
elif lower_name.endswith('.md'):
file_type = "MD"
else:
file_type = "EPUB"
status_text = f"📖 Current {file_type}: {file_name}"
status_color = 'green'
else:
# No source files in current selection
status_text = "📖 No EPUB/TXT/PDF/MD in current selection"
status_color = 'orange'
status_label = QLabel(status_text)
status_label.setFont(QFont('Arial', 10))
status_label.setStyleSheet(f"color: {status_color};")
epub_layout.addWidget(status_label)
def select_epub_for_qa():
# Allow selecting EPUB, TXT, PDF, or MD files as source
epub_path, _ = QFileDialog.getOpenFileName(
dialog,
"Select Source File",
"",
"Source files (*.epub *.txt *.pdf *.md);;EPUB files (*.epub);;Text files (*.txt);;PDF files (*.pdf);;Markdown files (*.md);;All files (*.*)"
)
if epub_path:
self.selected_epub_path = epub_path
self.config['last_epub_path'] = epub_path
self.save_config(show_message=False)
# Clear multiple EPUB tracking when manually selecting a single file
if hasattr(self, 'selected_epub_files'):
self.selected_epub_files = [epub_path]
lower_name = epub_path.lower()
if lower_name.endswith('.txt'):
file_type = "TXT"
elif lower_name.endswith('.pdf'):
file_type = "PDF"
elif lower_name.endswith('.md'):
file_type = "MD"
else:
file_type = "EPUB"
status_label.setText(f"📖 Current {file_type}: {os.path.basename(epub_path)}")
status_label.setStyleSheet("color: green;")
self.append_log(f"✅ Selected {file_type} for QA: {os.path.basename(epub_path)}")
select_epub_btn = QPushButton("Select Source File")
select_epub_btn.setFont(QFont('Arial', 9))
select_epub_btn.clicked.connect(select_epub_for_qa)
epub_layout.addWidget(select_epub_btn)
epub_layout.addStretch()
wordcount_layout.addWidget(epub_widget)
# Add option to disable mismatch warning
warn_mismatch_checkbox = self._create_styled_checkbox("Warn when EPUB and folder names don't match")
warn_mismatch_checkbox.setChecked(qa_settings.get('warn_name_mismatch', True))
wordcount_layout.addWidget(warn_mismatch_checkbox)
wordcount_layout.addSpacing(10)
# Missing images check (requires source file like word count does)
check_missing_images_checkbox = self._create_styled_checkbox("Check for missing image tags (images lost during translation)")
check_missing_images_checkbox.setChecked(qa_settings.get('check_missing_images', True))
wordcount_layout.addWidget(check_missing_images_checkbox)
images_desc = QLabel("Compares image tags between original and translated HTML files.\n" +
"Detects when <img> tags are lost during translation process.")
images_desc.setFont(QFont('Arial', 9))
images_desc.setStyleSheet("color: gray;")
images_desc.setWordWrap(True)
images_desc.setMaximumWidth(700)
wordcount_layout.addWidget(images_desc)
scroll_layout.addSpacing(20)
# Additional Checks Section
additional_group = QGroupBox("Additional Checks")
additional_group.setFont(QFont('Arial', 12, QFont.Bold))
additional_layout = QVBoxLayout(additional_group)
additional_layout.setContentsMargins(20, 15, 20, 15)
scroll_layout.addWidget(additional_group)
# Multiple headers check
check_multiple_headers_checkbox = self._create_styled_checkbox("Detect files with 2 or more headers (h1-h6 tags)")
check_multiple_headers_checkbox.setChecked(qa_settings.get('check_multiple_headers', True))
additional_layout.addWidget(check_multiple_headers_checkbox)
headers_desc = QLabel("Identifies files that may have been incorrectly split or merged.\n" +
"Useful for detecting chapters that contain multiple sections.")
headers_desc.setFont(QFont('Arial', 9))
headers_desc.setStyleSheet("color: gray;")
headers_desc.setWordWrap(True)
headers_desc.setMaximumWidth(700)
additional_layout.addWidget(headers_desc)
additional_layout.addSpacing(10)
# Missing HTML tag check
check_missing_html_tag_checkbox = self._create_styled_checkbox("Check HTML structure and tag consistency")
check_missing_html_tag_checkbox.setChecked(qa_settings.get('check_missing_html_tag', True))
additional_layout.addWidget(check_missing_html_tag_checkbox)
# Body tag check (separate, disabled by default)
body_tag_widget = QWidget()
body_tag_layout = QHBoxLayout(body_tag_widget)
body_tag_layout.setContentsMargins(0, 0, 0, 5)
check_body_tag_checkbox = self._create_styled_checkbox("Check for <body> tag consistency")
check_body_tag_checkbox.setChecked(qa_settings.get('check_body_tag', False))
body_tag_layout.addWidget(check_body_tag_checkbox)
body_tag_hint = QLabel("(Disabled by default - body tags not required in EPUBs)")
body_tag_hint.setFont(QFont('Arial', 9))
body_tag_hint.setStyleSheet("color: gray;")
body_tag_layout.addWidget(body_tag_hint)
body_tag_layout.addStretch()
additional_layout.addWidget(body_tag_widget)
# Missing header tags check
check_missing_header_tags_checkbox = self._create_styled_checkbox("Flag HTML files with no heading tags (h1-h6)")
check_missing_header_tags_checkbox.setChecked(qa_settings.get('check_missing_header_tags', True))
additional_layout.addWidget(check_missing_header_tags_checkbox)
# Invalid nesting check (separate toggle)
check_invalid_nesting_checkbox = self._create_styled_checkbox("Check for invalid tag nesting")
check_invalid_nesting_checkbox.setChecked(qa_settings.get('check_invalid_nesting', False))
additional_layout.addWidget(check_invalid_nesting_checkbox)
additional_layout.addSpacing(15)
# NEW: Paragraph Structure Check
# Separator line
separator_line = QFrame()
separator_line.setFrameShape(QFrame.HLine)
separator_line.setFrameShadow(QFrame.Sunken)
additional_layout.addWidget(separator_line)
additional_layout.addSpacing(10)
# Checkbox for paragraph structure check
check_paragraph_structure_checkbox = self._create_styled_checkbox("Check for insufficient paragraph tags")
check_paragraph_structure_checkbox.setChecked(qa_settings.get('check_paragraph_structure', True))
additional_layout.addWidget(check_paragraph_structure_checkbox)
# Threshold setting frame
threshold_widget = QWidget()
threshold_layout = QHBoxLayout(threshold_widget)
threshold_layout.setContentsMargins(20, 10, 0, 5)
threshold_label = QLabel("Minimum text in <p> tags:")
threshold_label.setFont(QFont('Arial', 10))
threshold_layout.addWidget(threshold_label)
# Get current threshold value (default 30%)
current_threshold = int(qa_settings.get('paragraph_threshold', 0.3) * 100)
# Spinbox for threshold
paragraph_threshold_spinbox = QSpinBox()
paragraph_threshold_spinbox.setMinimum(0)
paragraph_threshold_spinbox.setMaximum(100)
paragraph_threshold_spinbox.setValue(current_threshold)
paragraph_threshold_spinbox.setMinimumWidth(80)
disable_wheel_event(paragraph_threshold_spinbox)
threshold_layout.addWidget(paragraph_threshold_spinbox)
percent_label = QLabel("%")
percent_label.setFont(QFont('Arial', 10))
threshold_layout.addWidget(percent_label)
# Threshold value label
threshold_value_label = QLabel(f"(currently {current_threshold}%)")
threshold_value_label.setFont(QFont('Arial', 9))
threshold_value_label.setStyleSheet("color: gray;")
threshold_layout.addWidget(threshold_value_label)
threshold_layout.addStretch()
additional_layout.addWidget(threshold_widget)
# Update label when spinbox changes
def update_threshold_label(value):
threshold_value_label.setText(f"(currently {value}%)")
paragraph_threshold_spinbox.valueChanged.connect(update_threshold_label)
# Description
para_desc = QLabel("Detects HTML files where text content is not properly wrapped in paragraph tags.\n" +
"Files with less than the specified percentage of text in <p> tags will be flagged.\n" +
"Also checks for large blocks of unwrapped text directly in the body element.")
para_desc.setFont(QFont('Arial', 9))
para_desc.setStyleSheet("color: gray;")
para_desc.setWordWrap(True)
para_desc.setMaximumWidth(700)
para_desc.setContentsMargins(20, 5, 0, 0)
additional_layout.addWidget(para_desc)
# Enable/disable threshold setting based on checkbox
def toggle_paragraph_threshold(checked):
paragraph_threshold_spinbox.setEnabled(checked)
threshold_label.setEnabled(checked)
percent_label.setEnabled(checked)
threshold_value_label.setEnabled(checked)
check_paragraph_structure_checkbox.toggled.connect(toggle_paragraph_threshold)
toggle_paragraph_threshold(check_paragraph_structure_checkbox.isChecked()) # Set initial state
scroll_layout.addSpacing(20)
# Report Settings Section
report_group = QGroupBox("Report Settings")
report_group.setFont(QFont('Arial', 12, QFont.Bold))
report_layout = QVBoxLayout(report_group)
report_layout.setContentsMargins(20, 15, 20, 15)
scroll_layout.addWidget(report_group)
# Report format
format_widget = QWidget()
format_layout = QHBoxLayout(format_widget)
format_layout.setContentsMargins(0, 0, 0, 10)
format_label = QLabel("Report format:")
format_label.setFont(QFont('Arial', 10))
format_layout.addWidget(format_label)
current_format_value = qa_settings.get('report_format', 'detailed')
format_options = [
("Summary only", "summary"),
("Detailed (recommended)", "detailed"),
("Verbose (all data)", "verbose")
]
# Create radio buttons for format options
format_radio_buttons = []
for idx, (text, value) in enumerate(format_options):
rb = self._create_styled_radio_button(text)
if value == current_format_value:
rb.setChecked(True)
format_layout.addWidget(rb)
format_radio_buttons.append((rb, value))
format_layout.addStretch()
report_layout.addWidget(format_widget)
# Auto-save report
auto_save_checkbox = self._create_styled_checkbox("Automatically save report after scan")
auto_save_checkbox.setChecked(qa_settings.get('auto_save_report', True))
report_layout.addWidget(auto_save_checkbox)
# Add word count ratio threshold settings
# Min/Max normalized ratio thresholds
ratio_thresholds_widget = QWidget()
ratio_thresholds_layout = QHBoxLayout(ratio_thresholds_widget)
ratio_thresholds_layout.setContentsMargins(0, 10, 0, 5)
ratio_min_label = QLabel("Min Ratio (normalized):")
ratio_min_label.setFont(QFont('Arial', 10))
ratio_thresholds_layout.addWidget(ratio_min_label)
# Min ratio spinbox
ratio_min_spin = QComboBox()
ratio_min_spin.setEditable(True)
ratio_min_spin.addItem("Auto")
# Add reasonable range options
for val in [0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]:
ratio_min_spin.addItem(str(val))
saved_min = qa_settings.get('word_count_min_ratio', 'Auto')
ratio_min_spin.setCurrentText(str(saved_min))
ratio_min_spin.setMinimumWidth(100) # Increased from 80
disable_wheel_event(ratio_min_spin)
ratio_thresholds_layout.addWidget(ratio_min_spin)
ratio_thresholds_layout.addSpacing(20)
ratio_max_label = QLabel("Max Ratio (normalized):")
ratio_max_label.setFont(QFont('Arial', 10))
ratio_thresholds_layout.addWidget(ratio_max_label)
# Max ratio spinbox
ratio_max_spin = QComboBox()
ratio_max_spin.setEditable(True)
ratio_max_spin.addItem("Auto")
# Add reasonable range options
for val in [1.2, 1.5, 1.8, 2.0, 2.2, 2.5, 3.0, 4.0, 5.0]:
ratio_max_spin.addItem(str(val))
saved_max = qa_settings.get('word_count_max_ratio', 'Auto')
ratio_max_spin.setCurrentText(str(saved_max))
ratio_max_spin.setMinimumWidth(100) # Increased from 80
disable_wheel_event(ratio_max_spin)
ratio_thresholds_layout.addWidget(ratio_max_spin)
ratio_thresholds_layout.addStretch()
wordcount_layout.addWidget(ratio_thresholds_widget)
ratio_hint = QLabel("(Auto CJK: Min 0.6, Max 2.0 | Auto Non-CJK: Min 0.7, Max 1.5)\nValues are normalized by the language multiplier above.")
ratio_hint.setFont(QFont('Arial', 9))
ratio_hint.setStyleSheet("color: gray;")
wordcount_layout.addWidget(ratio_hint)
scroll_layout.addSpacing(15)
# HTML Structure Analysis Section
cache_group = QGroupBox("Performance Cache Settings")
cache_group.setFont(QFont('Arial', 12, QFont.Bold))
cache_layout = QVBoxLayout(cache_group)
cache_layout.setContentsMargins(20, 15, 20, 15)
scroll_layout.addWidget(cache_group)
# Enable cache checkbox
cache_enabled_checkbox = self._create_styled_checkbox("Enable performance cache (speeds up duplicate detection)")
cache_enabled_checkbox.setChecked(qa_settings.get('cache_enabled', True))
cache_layout.addWidget(cache_enabled_checkbox)
cache_layout.addSpacing(10)
# Cache size settings
cache_desc_label = QLabel("Cache sizes (0 = disabled, -1 = unlimited):")
cache_desc_label.setFont(QFont('Arial', 10))
cache_layout.addWidget(cache_desc_label)
cache_layout.addSpacing(5)
# Cache size variables - store spinboxes and buttons
cache_spinboxes = {}
cache_buttons = {}
cache_defaults = {
'normalize_text': 10000,
'similarity_ratio': 20000,
'content_hashes': 5000,
'semantic_fingerprint': 2000,
'structural_signature': 2000,
'translation_artifacts': 1000
}
# Create input fields for each cache type
for cache_name, default_value in cache_defaults.items():
row_widget = QWidget()
row_layout = QHBoxLayout(row_widget)
row_layout.setContentsMargins(0, 2, 0, 2)
# Label
label_text = cache_name.replace('_', ' ').title() + ":"
cache_label = QLabel(label_text)
cache_label.setFont(QFont('Arial', 9))
cache_label.setMinimumWidth(200)
row_layout.addWidget(cache_label)
# Get current value
current_value = qa_settings.get(f'cache_{cache_name}', default_value)
# Spinbox
spinbox = QSpinBox()
spinbox.setMinimum(-1)
spinbox.setMaximum(50000)
spinbox.setValue(current_value)
spinbox.setMinimumWidth(100)
disable_wheel_event(spinbox)
row_layout.addWidget(spinbox)
cache_spinboxes[cache_name] = spinbox
# Quick preset buttons
def make_preset_handler(sb, val):
return lambda: sb.setValue(val)
off_btn = QPushButton("Off")
off_btn.setFont(QFont('Arial', 8))
off_btn.setMinimumWidth(40)
off_btn.clicked.connect(make_preset_handler(spinbox, 0))
row_layout.addWidget(off_btn)
small_btn = QPushButton("Small")
small_btn.setFont(QFont('Arial', 8))
small_btn.setMinimumWidth(50)
small_btn.clicked.connect(make_preset_handler(spinbox, 1000))
row_layout.addWidget(small_btn)
medium_btn = QPushButton("Medium")
medium_btn.setFont(QFont('Arial', 8))
medium_btn.setMinimumWidth(60)
medium_btn.clicked.connect(make_preset_handler(spinbox, default_value))
row_layout.addWidget(medium_btn)
large_btn = QPushButton("Large")
large_btn.setFont(QFont('Arial', 8))
large_btn.setMinimumWidth(50)
large_btn.clicked.connect(make_preset_handler(spinbox, default_value * 2))
row_layout.addWidget(large_btn)
max_btn = QPushButton("Max")
max_btn.setFont(QFont('Arial', 8))
max_btn.setMinimumWidth(40)
max_btn.clicked.connect(make_preset_handler(spinbox, -1))
row_layout.addWidget(max_btn)
# Store buttons for enabling/disabling
cache_buttons[cache_name] = [cache_label, off_btn, small_btn, medium_btn, large_btn, max_btn]
row_layout.addStretch()
cache_layout.addWidget(row_widget)
# Enable/disable cache size controls based on checkbox
def toggle_cache_controls(checked):
for cache_name in cache_defaults.keys():
spinbox = cache_spinboxes[cache_name]
spinbox.setEnabled(checked)
for widget in cache_buttons[cache_name]:
widget.setEnabled(checked)
cache_enabled_checkbox.toggled.connect(toggle_cache_controls)
toggle_cache_controls(cache_enabled_checkbox.isChecked()) # Set initial state
cache_layout.addSpacing(10)
# Auto-size cache option
auto_size_widget = QWidget()
auto_size_layout = QHBoxLayout(auto_size_widget)
auto_size_layout.setContentsMargins(0, 0, 0, 5)
auto_size_checkbox = self._create_styled_checkbox("Auto-size caches based on available RAM")
auto_size_checkbox.setChecked(qa_settings.get('cache_auto_size', False))
auto_size_layout.addWidget(auto_size_checkbox)
auto_size_hint = QLabel("(overrides manual settings)")
auto_size_hint.setFont(QFont('Arial', 9))
auto_size_hint.setStyleSheet("color: gray;")
auto_size_layout.addWidget(auto_size_hint)
auto_size_layout.addStretch()
cache_layout.addWidget(auto_size_widget)
cache_layout.addSpacing(10)
# Cache statistics display
show_stats_checkbox = self._create_styled_checkbox("Show cache hit/miss statistics after scan")
show_stats_checkbox.setChecked(qa_settings.get('cache_show_stats', False))
cache_layout.addWidget(show_stats_checkbox)
cache_layout.addSpacing(10)
# Info about cache
cache_info = QLabel("Larger cache sizes use more memory but improve performance for:\n" +
"• Large datasets (100+ files)\n" +
"• AI Hunter mode (all file pairs compared)\n" +
"• Repeated scans of the same folder")
cache_info.setFont(QFont('Arial', 9))
cache_info.setStyleSheet("color: gray;")
cache_info.setWordWrap(True)
cache_info.setMaximumWidth(700)
cache_info.setContentsMargins(20, 0, 0, 0)
cache_layout.addWidget(cache_info)
scroll_layout.addSpacing(20)
# AI Hunter Performance Section
ai_hunter_group = QGroupBox("AI Hunter Performance Settings")
ai_hunter_group.setFont(QFont('Arial', 12, QFont.Bold))
ai_hunter_layout = QVBoxLayout(ai_hunter_group)
ai_hunter_layout.setContentsMargins(20, 15, 20, 15)
scroll_layout.addWidget(ai_hunter_group)
# Description
ai_hunter_desc = QLabel("AI Hunter mode performs exhaustive duplicate detection by comparing every file pair.\n" +
"Parallel processing can significantly speed up this process on multi-core systems.")
ai_hunter_desc.setFont(QFont('Arial', 9))
ai_hunter_desc.setStyleSheet("color: gray;")
ai_hunter_desc.setWordWrap(True)
ai_hunter_desc.setMaximumWidth(700)
ai_hunter_layout.addWidget(ai_hunter_desc)
ai_hunter_layout.addSpacing(10)
# Parallel workers setting
workers_widget = QWidget()
workers_layout = QHBoxLayout(workers_widget)
workers_layout.setContentsMargins(0, 0, 0, 10)
workers_label = QLabel("Maximum parallel workers:")
workers_label.setFont(QFont('Arial', 10))
workers_layout.addWidget(workers_label)
# Get current value from AI Hunter config
ai_hunter_config = self.config.get('ai_hunter_config', {})
current_max_workers = ai_hunter_config.get('ai_hunter_max_workers', 1)
ai_hunter_workers_spinbox = QSpinBox()
ai_hunter_workers_spinbox.setMinimum(0)
ai_hunter_workers_spinbox.setMaximum(64)
ai_hunter_workers_spinbox.setValue(current_max_workers)
ai_hunter_workers_spinbox.setMinimumWidth(100)
disable_wheel_event(ai_hunter_workers_spinbox)
workers_layout.addWidget(ai_hunter_workers_spinbox)
# CPU count display
import multiprocessing
cpu_count = multiprocessing.cpu_count()
cpu_hint = QLabel(f"(0 = use all {cpu_count} cores)")
cpu_hint.setFont(QFont('Arial', 9))
cpu_hint.setStyleSheet("color: gray;")
workers_layout.addWidget(cpu_hint)
workers_layout.addStretch()
ai_hunter_layout.addWidget(workers_widget)
# Quick preset buttons
preset_widget = QWidget()
preset_layout = QHBoxLayout(preset_widget)
preset_layout.setContentsMargins(0, 0, 0, 0)
preset_label = QLabel("Quick presets:")
preset_label.setFont(QFont('Arial', 9))
preset_layout.addWidget(preset_label)
preset_layout.addSpacing(10)
all_cores_btn = QPushButton(f"All cores ({cpu_count})")
all_cores_btn.setFont(QFont('Arial', 9))
all_cores_btn.clicked.connect(lambda: ai_hunter_workers_spinbox.setValue(0))
preset_layout.addWidget(all_cores_btn)
half_cores_btn = QPushButton("Half cores")
half_cores_btn.setFont(QFont('Arial', 9))
half_cores_btn.clicked.connect(lambda: ai_hunter_workers_spinbox.setValue(max(1, cpu_count // 2)))
preset_layout.addWidget(half_cores_btn)
four_cores_btn = QPushButton("4 cores")
four_cores_btn.setFont(QFont('Arial', 9))
four_cores_btn.clicked.connect(lambda: ai_hunter_workers_spinbox.setValue(4))
preset_layout.addWidget(four_cores_btn)
eight_cores_btn = QPushButton("8 cores")
eight_cores_btn.setFont(QFont('Arial', 9))
eight_cores_btn.clicked.connect(lambda: ai_hunter_workers_spinbox.setValue(8))
preset_layout.addWidget(eight_cores_btn)
single_thread_btn = QPushButton("Single thread")
single_thread_btn.setFont(QFont('Arial', 9))
single_thread_btn.clicked.connect(lambda: ai_hunter_workers_spinbox.setValue(1))
preset_layout.addWidget(single_thread_btn)
preset_layout.addStretch()
ai_hunter_layout.addWidget(preset_widget)
# Performance tips
tips_text = "Performance Tips:\n" + \
f"• Your system has {cpu_count} CPU cores available\n" + \
"• Using all cores provides maximum speed but may slow other applications\n" + \
"• 4-8 cores usually provides good balance of speed and system responsiveness\n" + \
"• Single thread (1) disables parallel processing for debugging"
tips_label = QLabel(tips_text)
tips_label.setFont(QFont('Arial', 9))
tips_label.setStyleSheet("color: gray;")
tips_label.setWordWrap(True)
tips_label.setMaximumWidth(700)
tips_label.setContentsMargins(20, 10, 0, 0)
ai_hunter_layout.addWidget(tips_label)
def save_settings():
"""Save QA scanner settings with comprehensive debugging"""
try:
# Check if debug mode is enabled
debug_mode = self.config.get('show_debug_buttons', False)
if debug_mode:
self.append_log("🔍 [DEBUG] Starting QA Scanner settings save process...")
# Helper to get the selected radio button value
def get_selected_radio_value(radio_button_list):
for rb, value in radio_button_list:
if rb.isChecked():
return value
return None
# Core QA Settings with debugging
core_settings_to_save = {
'foreign_char_threshold': (threshold_spinbox, lambda x: x.value()),
'excluded_characters': (excluded_text, lambda x: x.toPlainText().strip()),
'source_language': (source_lang_combo, lambda x: _normalize_source_language(x.currentText())),
'target_language': (target_language_combo, lambda x: _normalize_target_language(x.currentText())),
'check_encoding_issues': (check_encoding_checkbox, lambda x: x.isChecked()),
'check_repetition': (check_repetition_checkbox, lambda x: x.isChecked()),
'check_translation_artifacts': (check_artifacts_checkbox, lambda x: x.isChecked()),
'check_ai_artifacts': (check_ai_artifacts_checkbox, lambda x: x.isChecked()),
'check_punctuation_mismatch': (check_punctuation_checkbox, lambda x: x.isChecked()),
'punctuation_loss_threshold': (punct_threshold_spinbox, lambda x: x.value()),
'flag_excess_punctuation': (excess_punct_checkbox, lambda x: x.isChecked()),
'excess_punctuation_threshold': (excess_threshold_spinbox, lambda x: x.value()),
'check_glossary_leakage': (check_glossary_checkbox, lambda x: x.isChecked()),
'check_missing_images': (check_missing_images_checkbox, lambda x: x.isChecked()),
'min_file_length': (min_length_spinbox, lambda x: x.value()),
'min_duplicate_word_count': (min_dup_words_spinbox, lambda x: x.value()),
'min_text_length_for_spacing': (min_spacing_text_spinbox, lambda x: x.value()),
'report_format': (format_radio_buttons, get_selected_radio_value),
'auto_save_report': (auto_save_checkbox, lambda x: x.isChecked()),
'check_word_count_ratio': (check_word_count_checkbox, lambda x: x.isChecked()),
'counting_mode': (counting_mode_combo, lambda x: x.currentData()),
'check_multiple_headers': (check_multiple_headers_checkbox, lambda x: x.isChecked()),
'warn_name_mismatch': (warn_mismatch_checkbox, lambda x: x.isChecked()),
'check_missing_html_tag': (check_missing_html_tag_checkbox, lambda x: x.isChecked()),
'check_body_tag': (check_body_tag_checkbox, lambda x: x.isChecked()),
'check_missing_header_tags': (check_missing_header_tags_checkbox, lambda x: x.isChecked()),
'check_paragraph_structure': (check_paragraph_structure_checkbox, lambda x: x.isChecked()),
'check_invalid_nesting': (check_invalid_nesting_checkbox, lambda x: x.isChecked()),
'word_count_min_ratio': (ratio_min_spin, lambda x: x.currentText()),
'word_count_max_ratio': (ratio_max_spin, lambda x: x.currentText()),
}
failed_core_settings = []
for setting_name, (var_obj, converter) in core_settings_to_save.items():
try:
old_value = qa_settings.get(setting_name, '<NOT SET>')
new_value = converter(var_obj)
qa_settings[setting_name] = new_value
if debug_mode:
if old_value != new_value:
self.append_log(f"🔍 [DEBUG] QA {setting_name}: '{old_value}' → '{new_value}'")
else:
self.append_log(f"🔍 [DEBUG] QA {setting_name}: unchanged ('{new_value}')")
except Exception as e:
failed_core_settings.append(f"{setting_name} ({str(e)})")
if debug_mode:
self.append_log(f"❌ [DEBUG] Failed to save QA {setting_name}: {e}")
if failed_core_settings and debug_mode:
self.append_log(f"⚠️ [DEBUG] Failed QA core settings: {', '.join(failed_core_settings)}")
# Cache settings with debugging
if debug_mode:
self.append_log("🔍 [DEBUG] Saving QA cache settings...")
cache_settings_to_save = {
'cache_enabled': (cache_enabled_checkbox, lambda x: x.isChecked()),
'cache_auto_size': (auto_size_checkbox, lambda x: x.isChecked()),
'cache_show_stats': (show_stats_checkbox, lambda x: x.isChecked()),
}
failed_cache_settings = []
for setting_name, (var_obj, converter) in cache_settings_to_save.items():
try:
old_value = qa_settings.get(setting_name, '<NOT SET>')
new_value = converter(var_obj)
qa_settings[setting_name] = new_value
if debug_mode:
if old_value != new_value:
self.append_log(f"🔍 [DEBUG] QA {setting_name}: '{old_value}' → '{new_value}'")
else:
self.append_log(f"🔍 [DEBUG] QA {setting_name}: unchanged ('{new_value}')")
except Exception as e:
failed_cache_settings.append(f"{setting_name} ({str(e)})")
if debug_mode:
self.append_log(f"❌ [DEBUG] Failed to save QA {setting_name}: {e}")
# Save individual cache sizes with debugging
saved_cache_vars = []
failed_cache_vars = []
for cache_name, cache_spinbox in cache_spinboxes.items():
try:
cache_key = f'cache_{cache_name}'
old_value = qa_settings.get(cache_key, '<NOT SET>')
new_value = cache_spinbox.value()
qa_settings[cache_key] = new_value
saved_cache_vars.append(cache_name)
if debug_mode and old_value != new_value:
self.append_log(f"🔍 [DEBUG] QA {cache_key}: '{old_value}' → '{new_value}'")
except Exception as e:
failed_cache_vars.append(f"{cache_name} ({str(e)})")
if debug_mode:
self.append_log(f"❌ [DEBUG] Failed to save QA cache_{cache_name}: {e}")
if debug_mode:
if saved_cache_vars:
self.append_log(f"🔍 [DEBUG] Saved {len(saved_cache_vars)} cache settings: {', '.join(saved_cache_vars)}")
if failed_cache_vars:
self.append_log(f"⚠️ [DEBUG] Failed cache settings: {', '.join(failed_cache_vars)}")
# Save word count multipliers
try:
# Save auto toggle state
use_auto = auto_multipliers_checkbox.isChecked()
qa_settings['use_auto_multipliers'] = use_auto
# If auto is enabled, use default values; otherwise use slider values
if use_auto:
wc_mults = dict(default_wordcount_defaults)
if debug_mode:
self.append_log("🔍 [DEBUG] Using default word count multipliers (auto mode)")
else:
wc_mults = {}
for lang_key, widget in word_multiplier_sliders.items():
if lang_key.endswith('__spin'):
base_key = lang_key[:-6]
wc_mults[base_key] = widget.value() / 100.0
elif f"{lang_key}__spin" not in word_multiplier_sliders:
wc_mults[lang_key] = widget.value() / 100.0
if debug_mode:
self.append_log("🔍 [DEBUG] Using custom word count multipliers (manual mode)")
qa_settings['word_count_multipliers'] = wc_mults
except Exception as e:
if debug_mode:
self.append_log(f"❌ [DEBUG] Failed to save word count multipliers: {e}")
# AI Hunter config with debugging
if debug_mode:
self.append_log("🔍 [DEBUG] Saving AI Hunter config...")
try:
if 'ai_hunter_config' not in self.config:
self.config['ai_hunter_config'] = {}
if debug_mode:
self.append_log("🔍 [DEBUG] Created new ai_hunter_config section")
old_workers = self.config['ai_hunter_config'].get('ai_hunter_max_workers', '<NOT SET>')
new_workers = ai_hunter_workers_spinbox.value()
self.config['ai_hunter_config']['ai_hunter_max_workers'] = new_workers
if debug_mode:
if old_workers != new_workers:
self.append_log(f"🔍 [DEBUG] AI Hunter max_workers: '{old_workers}' → '{new_workers}'")
else:
self.append_log(f"🔍 [DEBUG] AI Hunter max_workers: unchanged ('{new_workers}')")
except Exception as e:
if debug_mode:
self.append_log(f"❌ [DEBUG] Failed to save AI Hunter config: {e}")
# Validate and save paragraph threshold with debugging
if debug_mode:
self.append_log("🔍 [DEBUG] Validating paragraph threshold...")
try:
threshold_value = paragraph_threshold_spinbox.value()
old_threshold = qa_settings.get('paragraph_threshold', '<NOT SET>')
if 0 <= threshold_value <= 100:
new_threshold = threshold_value / 100.0 # Convert to decimal
qa_settings['paragraph_threshold'] = new_threshold
if debug_mode:
if old_threshold != new_threshold:
self.append_log(f"🔍 [DEBUG] QA paragraph_threshold: '{old_threshold}' → '{new_threshold}' ({threshold_value}%)")
else:
self.append_log(f"🔍 [DEBUG] QA paragraph_threshold: unchanged ('{new_threshold}' / {threshold_value}%)")
else:
raise ValueError("Threshold must be between 0 and 100")
except (ValueError, Exception) as e:
# Default to 30% if invalid
qa_settings['paragraph_threshold'] = 0.3
if debug_mode:
self.append_log(f"❌ [DEBUG] Invalid paragraph threshold ({e}), using default 30%")
self.append_log("⚠️ Invalid paragraph threshold, using default 30%")
# Save to main config with debugging
if debug_mode:
self.append_log("🔍 [DEBUG] Saving QA settings to main config...")
try:
old_qa_config = self.config.get('qa_scanner_settings', {})
self.config['qa_scanner_settings'] = qa_settings
if debug_mode:
# Count changed settings
changed_settings = []
for key, new_value in qa_settings.items():
if old_qa_config.get(key) != new_value:
changed_settings.append(key)
if changed_settings:
self.append_log(f"🔍 [DEBUG] Changed {len(changed_settings)} QA settings: {', '.join(changed_settings[:5])}{'...' if len(changed_settings) > 5 else ''}")
else:
self.append_log("🔍 [DEBUG] No QA settings changed")
except Exception as e:
if debug_mode:
self.append_log(f"❌ [DEBUG] Failed to update main config: {e}")
# Sync target language with the main translation UI so all
# dropdowns stay in sync.
try:
display_lang = target_language_combo.currentText().strip()
if display_lang and hasattr(self, 'update_target_language'):
self.update_target_language(display_lang)
except Exception as e:
if debug_mode:
self.append_log(f"⚠️ [DEBUG] Failed to sync target language with main UI: {e}")
# Environment variables setup for QA Scanner
if debug_mode:
self.append_log("🔍 [DEBUG] Setting QA Scanner environment variables...")
qa_env_vars_set = []
try:
# QA Scanner environment variables
qa_env_mappings = [
('QA_FOREIGN_CHAR_THRESHOLD', str(qa_settings.get('foreign_char_threshold', 10))),
('QA_TARGET_LANGUAGE', qa_settings.get('target_language', 'english')),
('QA_CHECK_ENCODING', '1' if qa_settings.get('check_encoding_issues', False) else '0'),
('QA_CHECK_REPETITION', '1' if qa_settings.get('check_repetition', True) else '0'),
('QA_CHECK_ARTIFACTS', '1' if qa_settings.get('check_translation_artifacts', False) else '0'),
('QA_CHECK_AI_ARTIFACTS', '1' if qa_settings.get('check_ai_artifacts', False) else '0'),
('QA_CHECK_GLOSSARY_LEAKAGE', '1' if qa_settings.get('check_glossary_leakage', True) else '0'),
('QA_CHECK_MISSING_IMAGES', '1' if qa_settings.get('check_missing_images', True) else '0'),
('QA_MIN_FILE_LENGTH', str(qa_settings.get('min_file_length', 0))),
('QA_REPORT_FORMAT', qa_settings.get('report_format', 'detailed')),
('QA_AUTO_SAVE_REPORT', '1' if qa_settings.get('auto_save_report', True) else '0'),
('QA_CACHE_ENABLED', '1' if qa_settings.get('cache_enabled', True) else '0'),
('QA_PARAGRAPH_THRESHOLD', str(qa_settings.get('paragraph_threshold', 0.3))),
('AI_HUNTER_MAX_WORKERS', str(self.config.get('ai_hunter_config', {}).get('ai_hunter_max_workers', 1))),
# Counting mode: set env vars based on selection
('QA_USE_WORD_COUNT', '1' if qa_settings.get('counting_mode') == 'word' else '0'),
('QA_EXACT_CHAR_COUNT', '1' if qa_settings.get('counting_mode') == 'exact' else '0'),
]
for env_key, env_value in qa_env_mappings:
try:
old_value = os.environ.get(env_key, '<NOT SET>')
os.environ[env_key] = str(env_value)
new_value = os.environ[env_key]
qa_env_vars_set.append(env_key)
if debug_mode:
if old_value != new_value:
self.append_log(f"🔍 [DEBUG] ENV {env_key}: '{old_value}' → '{new_value}'")
else:
self.append_log(f"🔍 [DEBUG] ENV {env_key}: unchanged ('{new_value}')")
except Exception as e:
if debug_mode:
self.append_log(f"❌ [DEBUG] Failed to set {env_key}: {e}")
if debug_mode:
self.append_log(f"🔍 [DEBUG] Successfully set {len(qa_env_vars_set)} QA environment variables")
except Exception as e:
if debug_mode:
self.append_log(f"❌ [DEBUG] QA environment variable setup failed: {e}")
import traceback
self.append_log(f"❌ [DEBUG] Traceback: {traceback.format_exc()}")
# Call save_config with show_message=False to avoid the error
if debug_mode:
self.append_log("🔍 [DEBUG] Calling main save_config method...")
try:
self.save_config(show_message=False)
if debug_mode:
self.append_log("🔍 [DEBUG] Main save_config completed successfully")
except Exception as e:
if debug_mode:
self.append_log(f"❌ [DEBUG] Main save_config failed: {e}")
raise
# Final QA environment variable verification
if debug_mode:
self.append_log("🔍 [DEBUG] Final QA environment variable check:")
critical_qa_vars = ['QA_FOREIGN_CHAR_THRESHOLD', 'QA_TARGET_LANGUAGE', 'QA_REPORT_FORMAT', 'AI_HUNTER_MAX_WORKERS']
for var in critical_qa_vars:
value = os.environ.get(var, '<NOT SET>')
if value == '<NOT SET>' or not value:
self.append_log(f"❌ [DEBUG] CRITICAL QA: {var} is not set or empty!")
else:
self.append_log(f"✅ [DEBUG] QA {var}: {value}")
self.append_log("✅ QA Scanner settings saved successfully")
dialog._cleanup_scrolling() # Clean up scrolling bindings
dialog.accept()
except Exception as e:
# Get debug_mode again in case of early exception
debug_mode = self.config.get('show_debug_buttons', False)
if debug_mode:
self.append_log(f"❌ [DEBUG] QA save_settings full exception: {str(e)}")
import traceback
self.append_log(f"❌ [DEBUG] QA save_settings traceback: {traceback.format_exc()}")
self.append_log(f"❌ Error saving QA settings: {str(e)}")
QMessageBox.critical(dialog, "Error", f"Failed to save settings: {str(e)}")
def reset_defaults():
"""Reset to default settings"""
result = QMessageBox.question(
dialog,
"Reset to Defaults",
"Are you sure you want to reset all settings to defaults?\n\n(Your excluded characters list will be preserved)",
QMessageBox.Yes | QMessageBox.No
)
if result == QMessageBox.Yes:
# Save current excluded characters before reset
saved_excluded_chars = excluded_text.toPlainText()
# Foreign character / language defaults
threshold_spinbox.setValue(10)
source_lang_combo.setCurrentText('Auto')
target_language_combo.setCurrentText('English')
# Detection defaults
check_encoding_checkbox.setChecked(False)
check_repetition_checkbox.setChecked(True)
check_artifacts_checkbox.setChecked(False)
check_ai_artifacts_checkbox.setChecked(False)
check_punctuation_checkbox.setChecked(False)
punct_threshold_spinbox.setValue(49)
excess_punct_checkbox.setChecked(False)
excess_threshold_spinbox.setValue(49)
# Word count analysis defaults
check_word_count_checkbox.setChecked(True)
try:
idx = counting_mode_combo.findData('exact')
counting_mode_combo.setCurrentIndex(idx if idx >= 0 else 1)
except Exception:
pass
ratio_min_spin.setCurrentText('Auto')
ratio_max_spin.setCurrentText('Auto')
# Reset auto multipliers checkbox to default (enabled)
auto_multipliers_checkbox.setChecked(True)
# Reset word count multipliers to defaults
for lang_key, widget in word_multiplier_sliders.items():
if lang_key.endswith('__spin'):
base_key = lang_key[:-6]
default_val = default_wordcount_defaults.get(base_key, 1.0)
widget.setValue(int(default_val * 100))
elif f"{lang_key}__spin" not in word_multiplier_sliders:
default_val = default_wordcount_defaults.get(lang_key, 1.0)
widget.setValue(int(default_val * 100))
check_glossary_checkbox.setChecked(True)
check_missing_images_checkbox.setChecked(True)
min_length_spinbox.setValue(0)
# Set 'detailed' radio button as checked
for rb, value in format_radio_buttons:
rb.setChecked(value == 'detailed')
auto_save_checkbox.setChecked(True)
check_multiple_headers_checkbox.setChecked(True)
warn_mismatch_checkbox.setChecked(True)
check_missing_html_tag_checkbox.setChecked(True)
check_missing_header_tags_checkbox.setChecked(True)
check_paragraph_structure_checkbox.setChecked(True)
check_invalid_nesting_checkbox.setChecked(False)
paragraph_threshold_spinbox.setValue(30) # 30% default
# Reset cache settings
cache_enabled_checkbox.setChecked(True)
auto_size_checkbox.setChecked(False)
show_stats_checkbox.setChecked(False)
# Reset cache sizes to defaults
for cache_name, default_value in cache_defaults.items():
cache_spinboxes[cache_name].setValue(default_value)
ai_hunter_workers_spinbox.setValue(1)
# Restore excluded characters (per confirmation text)
excluded_text.setPlainText(saved_excluded_chars)
scroll_layout.addStretch()
# Create fixed bottom button section (outside scroll area)
button_widget = QWidget()
button_layout = QHBoxLayout(button_widget)
button_layout.setContentsMargins(20, 15, 20, 15)
save_btn = QPushButton("Save Settings")
save_btn.setMinimumWidth(120)
save_btn.setStyleSheet("background-color: #28a745; color: white; padding: 8px; font-weight: bold;")
save_btn.clicked.connect(save_settings)
button_layout.addWidget(save_btn)
cancel_btn = QPushButton("Cancel")
cancel_btn.setMinimumWidth(120)
cancel_btn.setStyleSheet("background-color: #6c757d; color: white; padding: 8px;")
cancel_btn.clicked.connect(lambda: [dialog._cleanup_scrolling(), dialog.reject()])
button_layout.addWidget(cancel_btn)
reset_btn = QPushButton("Reset to Default")
reset_btn.setMinimumWidth(120)
reset_btn.setStyleSheet("background-color: #ffc107; color: black; padding: 8px;")
reset_btn.clicked.connect(reset_defaults)
button_layout.addWidget(reset_btn)
# Add button widget to main layout (not scroll layout)
main_layout.addWidget(button_widget)
# Show the dialog (PySide6 handles sizing automatically)
# Note: The dialog size is already set in the constructor (800x600)
# Add a dummy _cleanup_scrolling method for compatibility
dialog._cleanup_scrolling = lambda: None
# Handle window close - just cleanup, don't call reject() to avoid recursion
def handle_close():
dialog._cleanup_scrolling()
dialog.rejected.connect(handle_close)
# Show the dialog with fade animation and return result
try:
from dialog_animations import exec_dialog_with_fade
return exec_dialog_with_fade(dialog, duration=250)
except Exception:
return dialog.exec()
def show_custom_detection_dialog(parent=None):
"""
Standalone function to show the custom detection settings dialog.
Returns a dictionary with the settings if user confirms, None if cancelled.
This function can be called from anywhere, including scan_html_folder.py
"""
from PySide6.QtWidgets import (QApplication, QDialog, QWidget, QLabel, QPushButton,
QVBoxLayout, QHBoxLayout, QScrollArea, QGroupBox,
QCheckBox, QSpinBox, QSlider, QMessageBox, QSizePolicy)
from PySide6.QtCore import Qt
from PySide6.QtGui import QFont, QIcon
import os
# Create dialog
custom_dialog = QDialog(parent)
custom_dialog.setWindowTitle("Custom Mode Settings")
custom_dialog.setModal(True)
# Set dialog size
screen = QApplication.primaryScreen().geometry()
custom_width = int(screen.width() * 0.51)
custom_height = int(screen.height() * 0.60)
custom_dialog.resize(custom_width, custom_height)
# Set window icon
try:
# Try to find the icon in common locations
possible_paths = [
os.path.join(os.path.dirname(os.path.abspath(__file__)), 'Halgakos.ico'),
os.path.join(os.getcwd(), 'Halgakos.ico'),
]
for ico_path in possible_paths:
if os.path.isfile(ico_path):
custom_dialog.setWindowIcon(QIcon(ico_path))
break
except Exception:
pass
# Main layout
dialog_layout = QVBoxLayout(custom_dialog)
# Scroll area
scroll = QScrollArea()
scroll.setWidgetResizable(True)
scroll.setHorizontalScrollBarPolicy(Qt.ScrollBarAsNeeded)
scroll.setVerticalScrollBarPolicy(Qt.ScrollBarAsNeeded)
# Scrollable content widget
scroll_widget = QWidget()
scroll_layout = QVBoxLayout(scroll_widget)
scroll.setWidget(scroll_widget)
dialog_layout.addWidget(scroll)
# Default settings
default_settings = {
'text_similarity': 85,
'semantic_analysis': 80,
'structural_patterns': 90,
'word_overlap': 75,
'minhash_similarity': 80,
'consecutive_chapters': 2,
'check_all_pairs': False,
'sample_size': 3000,
'min_text_length': 500,
'min_duplicate_word_count': 500
}
# Store widget references
custom_widgets = {}
# Title
title_label = QLabel("Configure Custom Detection Settings")
title_label.setFont(QFont('Arial', 20, QFont.Bold))
title_label.setAlignment(Qt.AlignCenter)
scroll_layout.addWidget(title_label)
scroll_layout.addSpacing(20)
# Detection Thresholds Section
threshold_group = QGroupBox("Detection Thresholds (%)")
threshold_group.setFont(QFont('Arial', 12, QFont.Bold))
threshold_layout = QVBoxLayout(threshold_group)
threshold_layout.setContentsMargins(25, 25, 25, 25)
scroll_layout.addWidget(threshold_group)
threshold_descriptions = {
'text_similarity': ('Text Similarity', 'Character-by-character comparison'),
'semantic_analysis': ('Semantic Analysis', 'Meaning and context matching'),
'structural_patterns': ('Structural Patterns', 'Document structure similarity'),
'word_overlap': ('Word Overlap', 'Common words between texts'),
'minhash_similarity': ('MinHash Similarity', 'Fast approximate matching')
}
# Create percentage labels dictionary
percentage_labels = {}
for setting_key, (label_text, description) in threshold_descriptions.items():
# Container for each threshold
row_widget = QWidget()
row_layout = QHBoxLayout(row_widget)
row_layout.setContentsMargins(0, 8, 0, 8)
# Left side - labels
label_widget = QWidget()
label_layout = QVBoxLayout(label_widget)
label_layout.setContentsMargins(0, 0, 0, 0)
main_label = QLabel(f"{label_text} - {description}:")
main_label.setFont(QFont('Arial', 11))
label_layout.addWidget(main_label)
label_widget.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Preferred)
row_layout.addWidget(label_widget)
# Right side - slider and percentage
slider_widget = QWidget()
slider_layout = QHBoxLayout(slider_widget)
slider_layout.setContentsMargins(20, 0, 0, 0)
# Create slider
slider = QSlider(Qt.Horizontal)
slider.setMinimum(10)
slider.setMaximum(100)
slider.setValue(custom_settings[setting_key])
slider.setMinimumWidth(300)
slider.wheelEvent = lambda event: event.ignore()
slider_layout.addWidget(slider)
# Percentage label
percentage_label = QLabel(f"{custom_settings[setting_key]}%")
percentage_label.setFont(QFont('Arial', 12, QFont.Bold))
percentage_label.setMinimumWidth(50)
percentage_label.setAlignment(Qt.AlignRight)
slider_layout.addWidget(percentage_label)
percentage_labels[setting_key] = percentage_label
row_layout.addWidget(slider_widget)
threshold_layout.addWidget(row_widget)
# Store slider widget reference
custom_widgets[setting_key] = slider
# Update percentage label when slider moves
def create_update_function(key, label, settings_dict):
def update_percentage(value):
settings_dict[key] = value
label.setText(f"{value}%")
return update_percentage
update_func = create_update_function(setting_key, percentage_label, custom_settings)
slider.valueChanged.connect(update_func)
scroll_layout.addSpacing(15)
# Processing Options Section
options_group = QGroupBox("Processing Options")
options_group.setFont(QFont('Arial', 12, QFont.Bold))
options_layout = QVBoxLayout(options_group)
options_layout.setContentsMargins(20, 20, 20, 20)
scroll_layout.addWidget(options_group)
# Consecutive chapters option
consec_widget = QWidget()
consec_layout = QHBoxLayout(consec_widget)
consec_layout.setContentsMargins(0, 5, 0, 5)
consec_label = QLabel("Consecutive chapters to check:")
consec_label.setFont(QFont('Arial', 11))
consec_layout.addWidget(consec_label)
consec_spinbox = QSpinBox()
consec_spinbox.setMinimum(1)
consec_spinbox.setMaximum(10)
consec_spinbox.setValue(custom_settings['consecutive_chapters'])
consec_spinbox.setMinimumWidth(100)
consec_spinbox.wheelEvent = lambda event: event.ignore()
consec_layout.addWidget(consec_spinbox)
consec_layout.addStretch()
options_layout.addWidget(consec_widget)
custom_widgets['consecutive_chapters'] = consec_spinbox
# Sample size option
sample_widget = QWidget()
sample_layout = QHBoxLayout(sample_widget)
sample_layout.setContentsMargins(0, 5, 0, 5)
sample_label = QLabel("Sample size for comparison (characters):")
sample_label.setFont(QFont('Arial', 11))
sample_layout.addWidget(sample_label)
sample_spinbox = QSpinBox()
sample_spinbox.setMinimum(-1)
# QSpinBox requires a maximum; set it extremely high to be effectively "no maximum"
sample_spinbox.setMaximum(2000000000)
sample_spinbox.setSingleStep(500)
sample_spinbox.setValue(custom_settings['sample_size'])
sample_spinbox.setMinimumWidth(100)
sample_spinbox.setToolTip("-1 = use all characters, 0 = disable duplicate detection")
sample_spinbox.wheelEvent = lambda event: event.ignore()
sample_layout.addWidget(sample_spinbox)
sample_layout.addStretch()
options_layout.addWidget(sample_widget)
custom_widgets['sample_size'] = sample_spinbox
# Minimum text length option
min_length_widget = QWidget()
min_length_layout = QHBoxLayout(min_length_widget)
min_length_layout.setContentsMargins(0, 5, 0, 5)
min_length_label = QLabel("Minimum text length to process (characters):")
min_length_label.setFont(QFont('Arial', 11))
min_length_layout.addWidget(min_length_label)
min_length_spinbox = QSpinBox()
min_length_spinbox.setMinimum(100)
min_length_spinbox.setMaximum(5000)
min_length_spinbox.setSingleStep(100)
min_length_spinbox.setValue(custom_settings['min_text_length'])
min_length_spinbox.setMinimumWidth(100)
min_length_spinbox.wheelEvent = lambda event: event.ignore()
min_length_layout.addWidget(min_length_spinbox)
min_length_layout.addStretch()
options_layout.addWidget(min_length_widget)
custom_widgets['min_text_length'] = min_length_spinbox
# Minimum word count for duplicate detection
min_dup_words_widget = QWidget()
min_dup_words_layout = QHBoxLayout(min_dup_words_widget)
min_dup_words_layout.setContentsMargins(0, 5, 0, 5)
min_dup_words_label = QLabel("Minimum words to flag as duplicate (skip small files like sections/notices):")
min_dup_words_label.setFont(QFont('Arial', 11))
min_dup_words_layout.addWidget(min_dup_words_label)
min_dup_words_spinbox = QSpinBox()
min_dup_words_spinbox.setMinimum(100)
min_dup_words_spinbox.setMaximum(2000)
min_dup_words_spinbox.setSingleStep(50)
min_dup_words_spinbox.setValue(custom_settings.get('min_duplicate_word_count', 500))
min_dup_words_spinbox.setMinimumWidth(100)
min_dup_words_spinbox.wheelEvent = lambda event: event.ignore()
min_dup_words_layout.addWidget(min_dup_words_spinbox)
min_dup_words_layout.addStretch()
options_layout.addWidget(min_dup_words_widget)
custom_widgets['min_duplicate_word_count'] = min_dup_words_spinbox
# Check all file pairs option
check_all_checkbox = QCheckBox("Check all file pairs (slower but more thorough)")
check_all_checkbox.setChecked(custom_settings['check_all_pairs'])
options_layout.addWidget(check_all_checkbox)
custom_widgets['check_all_pairs'] = check_all_checkbox
scroll_layout.addSpacing(30)
# Button layout
button_widget = QWidget()
button_layout = QHBoxLayout(button_widget)
button_layout.addStretch()
scroll_layout.addWidget(button_widget)
# Flag to track if settings were confirmed
settings_confirmed = False
result_settings = None
def confirm_settings():
"""Confirm settings and close dialog"""
nonlocal settings_confirmed, result_settings
result_settings = {
'text_similarity': custom_widgets['text_similarity'].value(),
'semantic_analysis': custom_widgets['semantic_analysis'].value(),
'structural_patterns': custom_widgets['structural_patterns'].value(),
'word_overlap': custom_widgets['word_overlap'].value(),
'minhash_similarity': custom_widgets['minhash_similarity'].value(),
'consecutive_chapters': custom_widgets['consecutive_chapters'].value(),
'check_all_pairs': custom_widgets['check_all_pairs'].isChecked(),
'sample_size': custom_widgets['sample_size'].value(),
'min_text_length': custom_widgets['min_text_length'].value(),
'min_duplicate_word_count': custom_widgets['min_duplicate_word_count'].value()
}
settings_confirmed = True
custom_dialog.accept()
def reset_to_defaults():
"""Reset all values to defaults"""
reply = QMessageBox.question(custom_dialog, "Reset to Defaults",
"Reset all values to default settings?",
QMessageBox.Yes | QMessageBox.No)
if reply == QMessageBox.Yes:
custom_widgets['text_similarity'].setValue(85)
custom_widgets['semantic_analysis'].setValue(80)
custom_widgets['structural_patterns'].setValue(90)
custom_widgets['word_overlap'].setValue(75)
custom_widgets['minhash_similarity'].setValue(80)
custom_widgets['consecutive_chapters'].setValue(2)
custom_widgets['check_all_pairs'].setChecked(False)
custom_widgets['sample_size'].setValue(3000)
custom_widgets['min_text_length'].setValue(500)
custom_widgets['min_duplicate_word_count'].setValue(500)
# Create buttons
cancel_btn = QPushButton("Cancel")
cancel_btn.setMinimumWidth(120)
cancel_btn.clicked.connect(custom_dialog.reject)
button_layout.addWidget(cancel_btn)
reset_btn = QPushButton("Reset Defaults")
reset_btn.setMinimumWidth(120)
reset_btn.clicked.connect(reset_to_defaults)
button_layout.addWidget(reset_btn)
start_btn = QPushButton("Start Scan")
start_btn.setMinimumWidth(120)
start_btn.setStyleSheet("""
QPushButton {
background-color: #28a745;
color: white;
border: 1px solid #28a745;
padding: 6px 12px;
border-radius: 4px;
}
QPushButton:hover {
background-color: #218838;
}
""")
start_btn.clicked.connect(confirm_settings)
button_layout.addWidget(start_btn)
button_layout.addStretch()
# Show dialog with fade animation and return result
try:
from dialog_animations import exec_dialog_with_fade
exec_dialog_with_fade(custom_dialog, duration=250)
except Exception:
custom_dialog.exec()
# Return settings if confirmed, None otherwise
return result_settings if settings_confirmed else None