import React, { useState, useEffect, useRef } from 'react';
import { useNavigate, Link, useLocation } from 'react-router-dom';
import toast from 'react-hot-toast';
import {
ArrowLeft, ArrowRight, FileText, Upload, Play, Download,
CheckCircle, AlertCircle, Loader2, Settings, Eye, Edit3,
Save, Trash2, RefreshCw, FileUp, X, ChevronDown, ChevronUp,
Hash, Type, List, BookOpen, Quote, Table, Image, Code
} from 'lucide-react';
import { wordFormatterAPI } from '../api';
// Paragraph type configuration with icons and colors
const PARAGRAPH_TYPES = {
title: { label: '标题', icon: Type, color: 'bg-blue-100 text-blue-700 border-blue-300' },
heading1: { label: '一级标题', icon: Hash, color: 'bg-blue-100 text-blue-700 border-blue-300' },
heading2: { label: '二级标题', icon: Hash, color: 'bg-cyan-100 text-cyan-700 border-cyan-300' },
heading3: { label: '三级标题', icon: Hash, color: 'bg-teal-100 text-teal-700 border-teal-300' },
abstract: { label: '摘要', icon: BookOpen, color: 'bg-amber-100 text-amber-700 border-amber-300' },
keywords: { label: '关键词', icon: List, color: 'bg-orange-100 text-orange-700 border-orange-300' },
body: { label: '正文', icon: FileText, color: 'bg-gray-100 text-gray-700 border-gray-300' },
quote: { label: '引用', icon: Quote, color: 'bg-blue-100 text-blue-700 border-blue-300' },
list_item: { label: '列表项', icon: List, color: 'bg-green-100 text-green-700 border-green-300' },
table: { label: '表格', icon: Table, color: 'bg-pink-100 text-pink-700 border-pink-300' },
figure: { label: '图片', icon: Image, color: 'bg-rose-100 text-rose-700 border-rose-300' },
code: { label: '代码', icon: Code, color: 'bg-slate-100 text-slate-700 border-slate-300' },
reference: { label: '参考文献', icon: BookOpen, color: 'bg-teal-100 text-teal-700 border-teal-300' },
};
const ArticlePreprocessorPage = () => {
const navigate = useNavigate();
const location = useLocation();
const fileInputRef = useRef(null);
const eventSourceRef = useRef(null);
// Input mode and content
const [inputMode, setInputMode] = useState('file'); // 'file' or 'text'
const [text, setText] = useState('');
const [file, setFile] = useState(null);
const [dragActive, setDragActive] = useState(false);
// Configuration
const [showConfig, setShowConfig] = useState(false);
const [chunkParagraphs, setChunkParagraphs] = useState(40);
const [chunkChars, setChunkChars] = useState(8000);
// Job state
const [currentJobId, setCurrentJobId] = useState(null);
const [jobStatus, setJobStatus] = useState(null); // 'pending', 'running', 'completed', 'failed'
const [progress, setProgress] = useState(null);
const [isSubmitting, setIsSubmitting] = useState(false);
// Result state
const [paragraphs, setParagraphs] = useState([]);
const [markedText, setMarkedText] = useState('');
const [integrityStatus, setIntegrityStatus] = useState(null);
const [editingIndex, setEditingIndex] = useState(null);
// View mode
const [viewMode, setViewMode] = useState('list'); // 'list' or 'raw'
const [usage, setUsage] = useState(null);
// Check if coming from spec generator with a spec
const selectedSpec = location.state?.specJson || null;
const specName = location.state?.specName || null;
useEffect(() => {
loadUsage();
return () => {
if (eventSourceRef.current) {
eventSourceRef.current.close();
}
};
}, []);
const loadUsage = async () => {
try {
const response = await wordFormatterAPI.getUsage();
setUsage(response.data);
} catch (error) {
console.error('Load usage failed:', error);
}
};
// File handling
const handleFileChange = (e) => {
const selectedFile = e.target.files?.[0];
if (selectedFile) {
validateAndSetFile(selectedFile);
}
};
const validateAndSetFile = (selectedFile) => {
const allowedTypes = [
'text/plain',
'text/markdown',
'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
];
const allowedExtensions = ['.txt', '.md', '.docx'];
const ext = selectedFile.name.substring(selectedFile.name.lastIndexOf('.')).toLowerCase();
if (!allowedTypes.includes(selectedFile.type) && !allowedExtensions.includes(ext)) {
toast.error('仅支持 .txt, .md, .docx 文件');
return;
}
if (selectedFile.size > 10 * 1024 * 1024) {
toast.error('文件大小不能超过 10MB');
return;
}
setFile(selectedFile);
toast.success(`已选择文件: ${selectedFile.name}`);
};
const handleDrag = (e) => {
e.preventDefault();
e.stopPropagation();
if (e.type === 'dragenter' || e.type === 'dragover') {
setDragActive(true);
} else if (e.type === 'dragleave') {
setDragActive(false);
}
};
const handleDrop = (e) => {
e.preventDefault();
e.stopPropagation();
setDragActive(false);
if (e.dataTransfer.files?.[0]) {
validateAndSetFile(e.dataTransfer.files[0]);
}
};
// Start preprocessing
const handleStartPreprocess = async () => {
if (inputMode === 'file' && !file) {
toast.error('请选择文件');
return;
}
if (inputMode === 'text' && !text.trim()) {
toast.error('请输入文本内容');
return;
}
try {
setIsSubmitting(true);
setJobStatus('pending');
setParagraphs([]);
setMarkedText('');
setIntegrityStatus(null);
let response;
if (inputMode === 'file') {
response = await wordFormatterAPI.preprocessFile(file, {
chunkParagraphs,
chunkChars,
});
} else {
response = await wordFormatterAPI.preprocessText(text, {
chunkParagraphs,
chunkChars,
});
}
const jobId = response.data.job_id;
setCurrentJobId(jobId);
startSSE(jobId);
toast.success('预处理任务已开始');
} catch (error) {
console.error('Start preprocess failed:', error);
toast.error(error.response?.data?.detail || '启动预处理失败');
setJobStatus(null);
} finally {
setIsSubmitting(false);
}
};
// SSE connection
const startSSE = (jobId) => {
if (eventSourceRef.current) {
eventSourceRef.current.close();
}
const url = wordFormatterAPI.getPreprocessStreamUrl(jobId);
const es = new EventSource(url);
eventSourceRef.current = es;
es.onmessage = (event) => {
try {
const data = JSON.parse(event.data);
handleSSEData(data);
} catch (e) {
console.error('SSE parse error:', e);
}
};
es.addEventListener('progress', (event) => {
try {
const data = JSON.parse(event.data);
setJobStatus('running');
setProgress(data);
} catch (e) {
console.error('SSE progress error:', e);
}
});
es.addEventListener('completed', (event) => {
try {
const data = JSON.parse(event.data);
setJobStatus('completed');
fetchResult(jobId);
toast.success('文章预处理完成!');
loadUsage();
} catch (e) {
console.error('SSE completed error:', e);
}
es.close();
});
es.addEventListener('error', (event) => {
try {
const data = JSON.parse(event.data);
setJobStatus('failed');
toast.error(`预处理失败: ${data.message}`);
} catch (e) {
console.error('SSE error event:', e);
}
es.close();
});
es.onerror = () => {
if (es.readyState === EventSource.CLOSED) {
return;
}
console.log('SSE connection error, will retry fetching result...');
es.close();
// 延迟后尝试获取结果,如果任务仍在运行会自动忽略
setTimeout(() => fetchResult(jobId), 2000);
};
};
const handleSSEData = (data) => {
if (data.status) {
setJobStatus(data.status);
}
if (data.progress) {
setProgress(data);
}
};
// Fetch preprocessing result
const fetchResult = async (jobId) => {
try {
const response = await wordFormatterAPI.getPreprocessResult(jobId);
if (response.data.success) {
// 后端直接返回 response.data,无需 .result
// 字段映射:后端 text/paragraph_type -> 前端 content/type
const paragraphsData = (response.data.paragraphs || []).map((p) => ({
index: p.index,
content: p.text,
type: p.paragraph_type || 'body',
}));
setParagraphs(paragraphsData);
setMarkedText(response.data.marked_text || '');
setIntegrityStatus({
verified: response.data.integrity_check_passed,
originalHash: response.data.original_hash,
processedHash: response.data.processed_hash,
});
setJobStatus('completed');
} else {
// 任务失败
setJobStatus('failed');
toast.error(response.data.error || '预处理失败');
}
} catch (error) {
console.error('Fetch result failed:', error);
const status = error.response?.status;
if (status === 404) {
toast.error('任务不存在或已过期');
setJobStatus(null);
} else if (status === 400) {
// 任务尚未完成,保持当前状态
console.log('任务尚未完成,稍后重试');
} else {
// 其他错误
console.error('获取结果失败:', error.response?.data?.detail || error.message);
}
}
};
// Edit paragraph type
const handleTypeChange = (index, newType) => {
const updated = [...paragraphs];
updated[index] = { ...updated[index], type: newType };
setParagraphs(updated);
setEditingIndex(null);
// Regenerate marked text
regenerateMarkedText(updated);
};
const regenerateMarkedText = (updatedParagraphs) => {
const lines = updatedParagraphs.map((p) => {
return `\n${p.content}`;
});
setMarkedText(lines.join('\n\n'));
};
// Export marked text
const handleExportMarkdown = () => {
if (!markedText) {
toast.error('没有可导出的内容');
return;
}
const blob = new Blob([markedText], { type: 'text/markdown;charset=utf-8' });
const url = URL.createObjectURL(blob);
const a = document.createElement('a');
a.href = url;
a.download = file?.name?.replace(/\.[^.]+$/, '_marked.md') || 'article_marked.md';
document.body.appendChild(a);
a.click();
document.body.removeChild(a);
URL.revokeObjectURL(url);
toast.success('已导出 Markdown 文件');
};
// Navigate to format page
const handleGoToFormat = () => {
if (!markedText) {
toast.error('请先完成预处理');
return;
}
navigate('/word-formatter', {
state: {
preprocessedText: markedText,
specJson: selectedSpec,
specName: specName,
},
});
};
// Reset form
const handleReset = () => {
if (eventSourceRef.current) {
eventSourceRef.current.close();
}
setFile(null);
setText('');
setCurrentJobId(null);
setJobStatus(null);
setProgress(null);
setParagraphs([]);
setMarkedText('');
setIntegrityStatus(null);
if (fileInputRef.current) {
fileInputRef.current.value = '';
}
};
// Delete current job
const handleDeleteJob = async () => {
if (!currentJobId) return;
try {
await wordFormatterAPI.deletePreprocessJob(currentJobId);
handleReset();
toast.success('任务已删除');
} catch (error) {
console.error('Delete job failed:', error);
toast.error('删除任务失败');
}
};
// Render paragraph type badge
const renderTypeBadge = (type, index) => {
const config = PARAGRAPH_TYPES[type] || PARAGRAPH_TYPES.body;
const IconComponent = config.icon;
const isEditing = editingIndex === index;
if (isEditing) {
return (
选择段落类型
{Object.entries(PARAGRAPH_TYPES).map(([key, cfg]) => {
const Icon = cfg.icon;
return (
);
})}
);
}
return (
);
};
// Render progress bar
const renderProgress = () => {
// 后端发送: { phase, progress (0-1), message, detail }
// detail 格式: "分块 x/y" 或 null
// 没有进度数据时显示加载中状态
if (!progress) {
return (
);
}
const percentage = Math.round((progress.progress || 0) * 100);
// 解析 detail 获取分块信息
let chunkInfo = '';
if (progress.detail) {
chunkInfo = progress.detail;
}
// 根据后端 phase 值显示消息
const phaseMessages = {
splitting: '正在分割文章...',
marking: `正在识别段落类型${chunkInfo ? ` (${chunkInfo})` : ''}`,
validating: '正在验证完整性...',
completed: '处理完成',
error: '处理出错',
};
const displayMessage = progress.message || phaseMessages[progress.phase] || '处理中...';
return (
{displayMessage}
{percentage}%
);
};
return (
{/* Header */}
{usage && (
使用量: {usage.used}/{usage.limit}
)}
{selectedSpec && (
已选规范: {specName || '自定义'}
)}
{/* Workflow indicator */}
1. 生成规范
2. 文章预处理
3. 生成 Word
{/* Left Panel - Input */}
{/* Input Mode Toggle */}
{inputMode === 'file' ? (
{file ? (
{file.name}
{(file.size / 1024).toFixed(1)} KB
) : (
<>
拖拽文件到这里,或点击选择
支持 .txt, .md, .docx (最大 10MB)
>
)}
) : (
{/* Configuration */}
{showConfig && (
)}
{/* Action Buttons */}
{/* Right Panel - Result */}
{/* Progress */}
{(jobStatus === 'running' || jobStatus === 'pending') && renderProgress()}
{/* Result Header */}
{jobStatus === 'completed' && paragraphs.length > 0 && (
预处理结果
{/* Integrity Status */}
{integrityStatus && (
{integrityStatus.verified ? (
<>
内容完整性验证通过 - 原文未被修改
>
) : (
<>
警告:内容可能已被修改,请仔细检查
>
)}
)}
{/* Statistics */}
{paragraphs.filter((p) => p.type.startsWith('heading')).length}
标题
{paragraphs.filter((p) => p.type === 'body').length}
正文
{/* Content View */}
{viewMode === 'list' ? (
{paragraphs.map((para, index) => (
{index + 1}
{renderTypeBadge(para.type, index)}
{para.content}
))}
) : (
{markedText}
)}
{/* Action Buttons */}
)}
{/* Empty state */}
{!jobStatus && (
等待预处理
上传文件或粘贴文本后,点击"开始预处理"按钮
AI 将自动识别并标记段落类型
)}
{/* Failed state */}
{jobStatus === 'failed' && (
预处理失败
请检查文件格式或网络连接后重试
)}
);
};
export default ArticlePreprocessorPage;