comfyuiman commited on
Commit
cd3f86a
·
verified ·
1 Parent(s): 9722255

Upload 20 files

Browse files
.dockerignore ADDED
@@ -0,0 +1 @@
 
 
1
+ ��^�j�W�����$z����b�
.env.local ADDED
@@ -0,0 +1 @@
 
 
1
+ GEMINI_API_KEY=PLACEHOLDER_API_KEY
.gitignore ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Logs
2
+ logs
3
+ *.log
4
+ npm-debug.log*
5
+ yarn-debug.log*
6
+ yarn-error.log*
7
+ pnpm-debug.log*
8
+ lerna-debug.log*
9
+
10
+ node_modules
11
+ dist
12
+ dist-ssr
13
+ *.local
14
+
15
+ # Editor directories and files
16
+ .vscode/*
17
+ !.vscode/extensions.json
18
+ .idea
19
+ .DS_Store
20
+ *.suo
21
+ *.ntvs*
22
+ *.njsproj
23
+ *.sln
24
+ *.sw?
App.tsx ADDED
@@ -0,0 +1,883 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import React, { useState, useCallback, useMemo, useEffect, useRef } from 'react';
2
+ import type { MediaFile } from './types';
3
+ import { GenerationStatus } from './types';
4
+ import FileUploader from './components/FileUploader';
5
+ import MediaItem from './components/MediaItem';
6
+ import { generateCaption, refineCaption, checkCaptionQuality } from './services/geminiService';
7
+ import { generateCaptionQwen, refineCaptionQwen, checkQualityQwen } from './services/qwenService';
8
+ import { sendComfyPrompt } from './services/comfyService';
9
+ import { DownloadIcon, SparklesIcon, WandIcon, LoaderIcon, CopyIcon, UploadCloudIcon, XIcon, CheckCircleIcon, AlertTriangleIcon, StopIcon, TrashIcon } from './components/Icons';
10
+ import { DEFAULT_COMFY_WORKFLOW } from './constants/defaultWorkflow';
11
+
12
+ declare const process: {
13
+ env: { API_KEY?: string; [key: string]: string | undefined; }
14
+ };
15
+
16
+ declare global {
17
+ interface AIStudio {
18
+ hasSelectedApiKey: () => Promise<boolean>;
19
+ openSelectKey: () => Promise<void>;
20
+ }
21
+ interface Window { JSZip: any; aistudio?: AIStudio; }
22
+ }
23
+
24
+ type ApiProvider = 'gemini' | 'qwen';
25
+ type OSType = 'windows' | 'linux';
26
+
27
+ const GEMINI_MODELS = [
28
+ { id: 'gemini-3-pro-preview', name: 'Gemini 3 Pro (High Quality)' },
29
+ { id: 'gemini-3-flash-preview', name: 'Gemini 3 Flash (Fast)' },
30
+ { id: 'gemini-2.5-pro-preview-09-2025', name: 'Gemini 2.5 Pro (Multimodal)' },
31
+ { id: 'gemini-2.5-flash-native-audio-preview-09-2025', name: 'Gemini 2.5 Flash (Multimedia Speed)' }
32
+ ];
33
+
34
+ const QWEN_MODELS = [
35
+ { id: 'thesby/Qwen3-VL-8B-NSFW-Caption-V4.5', name: 'Thesby Qwen 3 VL 8B NSFW Caption V4.5' },
36
+ { id: 'huihui-ai/Huihui-Qwen3-VL-8B-Instruct-abliterated', name: 'Huihui Qwen 3 VL 8B Abliterated (Uncensored)' },
37
+ { id: 'Qwen/Qwen3-VL-8B-Instruct-FP8', name: 'Qwen 3 VL 8B FP8' },
38
+ ];
39
+
40
+ const DEFAULT_BULK_INSTRUCTIONS = `Dont use ambiguous language "perhaps" for example. Describe EVERYTHING visible: characters, clothing, actions, background, objects, lighting, and camera angle. Refrain from using generic phrases like "character, male, figure of" and use specific terminology: "woman, girl, boy, man". Do not mention the art style.`;
41
+ const DEFAULT_REFINEMENT_INSTRUCTIONS = `Refine the caption to be more descriptive and cinematic. Ensure all colors and materials are mentioned.`;
42
+
43
+ const App: React.FC = () => {
44
+ // --- STATE ---
45
+ const [mediaFiles, setMediaFiles] = useState<MediaFile[]>([]);
46
+ const [triggerWord, setTriggerWord] = useState<string>('MyStyle');
47
+ const [apiProvider, setApiProvider] = useState<ApiProvider>('gemini');
48
+ const [geminiApiKey, setGeminiApiKey] = useState<string>(process.env.API_KEY || '');
49
+ const [geminiModel, setGeminiModel] = useState<string>(GEMINI_MODELS[0].id);
50
+ const [hasSelectedKey, setHasSelectedKey] = useState<boolean>(false);
51
+
52
+ // Qwen Options
53
+ const [qwenEndpoint, setQwenEndpoint] = useState<string>('');
54
+ const [useCustomQwenModel, setUseCustomQwenModel] = useState<boolean>(false);
55
+ const [customQwenModelId, setCustomQwenModelId] = useState<string>('');
56
+ const [qwenModel, setQwenModel] = useState<string>(QWEN_MODELS[0].id);
57
+ const [qwenOsType, setQwenOsType] = useState<OSType>(() => navigator.userAgent.includes("Windows") ? 'windows' : 'linux');
58
+ const [qwenInstallDir, setQwenInstallDir] = useState<string>(() => navigator.userAgent.includes("Windows") ? 'C:\\AI\\qwen_local' : '/home/user/ai/qwen_local');
59
+ const [qwenMaxTokens, setQwenMaxTokens] = useState<number>(8192);
60
+ const [qwen8Bit, setQwen8Bit] = useState<boolean>(false);
61
+ const [qwenEager, setQwenEager] = useState<boolean>(false);
62
+ const [qwenVideoFrameCount, setQwenVideoFrameCount] = useState<number>(8);
63
+
64
+ // Offline Local Snapshot Options
65
+ const [useOfflineSnapshot, setUseOfflineSnapshot] = useState<boolean>(false);
66
+ const [snapshotPath, setSnapshotPath] = useState<string>('');
67
+ const [virtualModelName, setVirtualModelName] = useState<string>('thesby/Qwen3-VL-8B-NSFW-Caption-V4.5');
68
+
69
+ // ComfyUI Options
70
+ const [isComfyEnabled, setIsComfyEnabled] = useState<boolean>(false);
71
+ const [comfyUrl, setComfyUrl] = useState<string>('http://localhost:5000');
72
+ const [comfyWorkflow, setComfyWorkflow] = useState<any>(DEFAULT_COMFY_WORKFLOW);
73
+ const [comfyWorkflowName, setComfyWorkflowName] = useState<string>('Default Workflow');
74
+ const [comfySeed, setComfySeed] = useState<number>(-1);
75
+ const [comfySteps, setComfySteps] = useState<number>(4);
76
+ const [activePreviewId, setActivePreviewId] = useState<string | null>(null);
77
+
78
+ // Secure Bridge Options
79
+ const [useSecureBridge, setUseSecureBridge] = useState<boolean>(false);
80
+ const [isFirstTimeBridge, setIsFirstTimeBridge] = useState<boolean>(false);
81
+ const [bridgeOsType, setBridgeOsType] = useState<OSType>(() => navigator.userAgent.includes("Windows") ? 'windows' : 'linux');
82
+ const [bridgeInstallPath, setBridgeInstallPath] = useState<string>('/mnt/Goon/captioner');
83
+
84
+ // Queue and Performance
85
+ const [useRequestQueue, setUseRequestQueue] = useState<boolean>(true);
86
+ const [concurrentTasks, setConcurrentTasks] = useState<number>(1);
87
+ const [isQueueRunning, setIsQueueRunning] = useState<boolean>(false);
88
+
89
+ // Dataset / Instructions
90
+ const [bulkGenerationInstructions, setBulkGenerationInstructions] = useState<string>(DEFAULT_BULK_INSTRUCTIONS);
91
+ const [bulkRefinementInstructions, setBulkRefinementInstructions] = useState<string>(DEFAULT_REFINEMENT_INSTRUCTIONS);
92
+ const [autofitTextareas, setAutofitTextareas] = useState<boolean>(false);
93
+ const [showSideBySidePreview, setShowSideBySidePreview] = useState<boolean>(false);
94
+ const [datasetPrefix, setDatasetPrefix] = useState<string>('item');
95
+ const [isCharacterTaggingEnabled, setIsCharacterTaggingEnabled] = useState<boolean>(false);
96
+ const [characterShowName, setCharacterShowName] = useState<string>('');
97
+ const [isExporting, setIsExporting] = useState<boolean>(false);
98
+
99
+ const abortControllerRef = useRef<AbortController>(new AbortController());
100
+
101
+ // --- EFFECTS ---
102
+ useEffect(() => {
103
+ if (window.aistudio) {
104
+ window.aistudio.hasSelectedApiKey().then(setHasSelectedKey);
105
+ }
106
+ const isHttps = window.location.protocol === 'https:';
107
+ if (!qwenEndpoint) {
108
+ setQwenEndpoint(isHttps ? '' : 'http://localhost:8000/v1');
109
+ }
110
+ }, [qwenEndpoint]);
111
+
112
+ // Handle Modal Keyboard Navigation
113
+ useEffect(() => {
114
+ const handleKeyDown = (e: KeyboardEvent) => {
115
+ if (!activePreviewId) return;
116
+ if (e.key === 'ArrowRight') handleNextPreview();
117
+ if (e.key === 'ArrowLeft') handlePrevPreview();
118
+ if (e.key === 'Escape') setActivePreviewId(null);
119
+ };
120
+ window.addEventListener('keydown', handleKeyDown);
121
+ return () => window.removeEventListener('keydown', handleKeyDown);
122
+ }, [activePreviewId, mediaFiles]);
123
+
124
+ // --- MEMOIZED VALUES ---
125
+ const hasValidConfig = useMemo(() => {
126
+ if (apiProvider === 'gemini') return !!geminiApiKey;
127
+ return qwenEndpoint !== '';
128
+ }, [apiProvider, geminiApiKey, qwenEndpoint]);
129
+
130
+ const selectedFiles = useMemo(() => {
131
+ return (mediaFiles || []).filter(mf => mf.isSelected);
132
+ }, [mediaFiles]);
133
+ const currentPreviewItem = useMemo(() => (mediaFiles || []).find(m => m.id === activePreviewId), [mediaFiles, activePreviewId]);
134
+
135
+ const qwenEffectiveModel = useMemo(() => {
136
+ if (useOfflineSnapshot) return virtualModelName;
137
+ return useCustomQwenModel ? customQwenModelId : qwenModel;
138
+ }, [useOfflineSnapshot, virtualModelName, useCustomQwenModel, customQwenModelId, qwenModel]);
139
+
140
+ const qwenStartCommand = useMemo(() => {
141
+ const isWin = qwenOsType === 'windows';
142
+ const path = qwenInstallDir.replace(/[\\/]+$/, '');
143
+
144
+ // Model logic for command
145
+ const modelToLoad = useOfflineSnapshot ? snapshotPath : (useCustomQwenModel ? customQwenModelId : qwenModel);
146
+
147
+ const activate = isWin ? `venv\\Scripts\\activate` : `source venv/bin/activate`;
148
+ const python = isWin ? `python` : `python3`;
149
+ const offlineEnv = isWin ? `set HF_HUB_OFFLINE=1` : `export HF_HUB_OFFLINE=1`;
150
+
151
+ let args = `--model "${modelToLoad}" --max-model-len ${qwenMaxTokens}`;
152
+ if (useOfflineSnapshot) {
153
+ args += ` --served-model-name "${virtualModelName}"`;
154
+ }
155
+ if (qwen8Bit) args += ` --load-format bitsandbytes --quantization bitsandbytes`;
156
+ if (qwenEager) args += ` --enforce-eager`;
157
+
158
+ const baseCmd = isWin
159
+ ? `cd /d "${path}" && ${useOfflineSnapshot ? `${offlineEnv} && ` : ''}${activate} && ${python} -m vllm.entrypoints.openai.api_server ${args}`
160
+ : `cd "${path}" && ${useOfflineSnapshot ? `${offlineEnv} && ` : ''}${activate} && ${python} -m vllm.entrypoints.openai.api_server ${args}`;
161
+
162
+ return baseCmd;
163
+ }, [qwenOsType, qwenInstallDir, useCustomQwenModel, customQwenModelId, qwenModel, qwenMaxTokens, qwen8Bit, qwenEager, useOfflineSnapshot, snapshotPath, virtualModelName]);
164
+
165
+ const bridgeStartCommand = useMemo(() => {
166
+ const isWindows = bridgeOsType === 'windows';
167
+ const path = bridgeInstallPath.replace(/[\\/]+$/, '');
168
+ const activateCmd = isWindows ? `call venv\\Scripts\\activate` : `source venv/bin/activate`;
169
+ const pipCmd = `pip install flask flask-cors requests`;
170
+ const setupCmd = isWindows
171
+ ? `python -m venv venv && ${activateCmd} && ${pipCmd}`
172
+ : `python3 -m venv venv && ${activateCmd} && ${pipCmd}`;
173
+ return isWindows
174
+ ? `cd /d "${path}" && ${isFirstTimeBridge ? `${setupCmd} && ` : ''}${activateCmd} && python bridge.py`
175
+ : `cd "${path}" && ${isFirstTimeBridge ? `${setupCmd} && ` : ''}${activateCmd} && python3 bridge.py`;
176
+ }, [bridgeInstallPath, bridgeOsType, isFirstTimeBridge]);
177
+
178
+ const isTunnelRequired = useMemo(() => {
179
+ return window.location.protocol === 'https:' && (qwenEndpoint.includes('localhost') || qwenEndpoint.includes('127.0.0.1'));
180
+ }, [qwenEndpoint]);
181
+
182
+ // --- HANDLERS ---
183
+ const handleSelectApiKey = async () => {
184
+ if (window.aistudio) {
185
+ await window.aistudio.openSelectKey();
186
+ setHasSelectedKey(true);
187
+ }
188
+ };
189
+
190
+ const updateFile = useCallback((id: string, updates: Partial<MediaFile>) => {
191
+ setMediaFiles(prev => (prev || []).map(mf => (mf.id === id ? { ...mf, ...updates } : mf)));
192
+ }, []);
193
+
194
+ const handleFilesAdded = useCallback(async (files: File[]) => {
195
+ const mediaUploads = files.filter(file => file.type.startsWith('image/') || file.type.startsWith('video/'));
196
+ const newMediaFiles = await Promise.all(mediaUploads.map(async (file) => ({
197
+ id: `${file.name}-${Math.random()}`,
198
+ file,
199
+ previewUrl: URL.createObjectURL(file),
200
+ caption: '',
201
+ status: GenerationStatus.IDLE,
202
+ isSelected: false,
203
+ customInstructions: '',
204
+ comfyStatus: 'idle'
205
+ } as MediaFile)));
206
+ setMediaFiles(prev => [...(prev || []), ...newMediaFiles]);
207
+ }, []);
208
+
209
+ const handleCheckQuality = useCallback(async (id: string) => {
210
+ const fileToProcess = (mediaFiles || []).find(mf => mf.id === id);
211
+ if (!hasValidConfig || !fileToProcess || !fileToProcess.caption) return;
212
+
213
+ updateFile(id, { status: GenerationStatus.CHECKING, errorMessage: undefined });
214
+
215
+ try {
216
+ const score = apiProvider === 'gemini'
217
+ ? await checkCaptionQuality(fileToProcess.file, fileToProcess.caption, abortControllerRef.current.signal, geminiApiKey, geminiModel)
218
+ : await checkQualityQwen('', qwenEndpoint, qwenEffectiveModel, fileToProcess.file, fileToProcess.caption, qwenVideoFrameCount, abortControllerRef.current.signal);
219
+
220
+ updateFile(id, { qualityScore: score, status: GenerationStatus.SUCCESS });
221
+ } catch (err: any) {
222
+ if (err.name === 'AbortError' || err.message === 'AbortError') {
223
+ updateFile(id, { status: GenerationStatus.IDLE, errorMessage: "Stopped by user" });
224
+ } else {
225
+ updateFile(id, { status: GenerationStatus.ERROR, errorMessage: err.message });
226
+ }
227
+ }
228
+ }, [mediaFiles, apiProvider, qwenEndpoint, qwenEffectiveModel, qwenVideoFrameCount, hasValidConfig, updateFile, geminiApiKey, geminiModel]);
229
+
230
+ const handleGenerateCaption = useCallback(async (id: string, itemInstructions?: string) => {
231
+ const fileToProcess = (mediaFiles || []).find(mf => mf.id === id);
232
+ if (!hasValidConfig || !fileToProcess) return;
233
+
234
+ updateFile(id, { status: GenerationStatus.GENERATING, errorMessage: undefined, qualityScore: undefined });
235
+
236
+ const combinedInstructions = `${bulkGenerationInstructions}\n\n${itemInstructions || ''}`.trim();
237
+
238
+ try {
239
+ const caption = apiProvider === 'gemini'
240
+ ? await generateCaption(fileToProcess.file, triggerWord, combinedInstructions, isCharacterTaggingEnabled, characterShowName, abortControllerRef.current.signal, geminiApiKey, geminiModel)
241
+ : await generateCaptionQwen('', qwenEndpoint, qwenEffectiveModel, fileToProcess.file, triggerWord, combinedInstructions, isCharacterTaggingEnabled, characterShowName, qwenVideoFrameCount, abortControllerRef.current.signal);
242
+
243
+ updateFile(id, { caption, status: GenerationStatus.SUCCESS });
244
+ } catch (err: any) {
245
+ if (err.name === 'AbortError' || err.message === 'AbortError') {
246
+ updateFile(id, { status: GenerationStatus.IDLE, errorMessage: "Stopped by user" });
247
+ } else {
248
+ updateFile(id, { status: GenerationStatus.ERROR, errorMessage: err.message });
249
+ }
250
+ }
251
+ }, [mediaFiles, triggerWord, apiProvider, qwenEndpoint, qwenEffectiveModel, qwenVideoFrameCount, bulkGenerationInstructions, isCharacterTaggingEnabled, characterShowName, hasValidConfig, updateFile, geminiApiKey, geminiModel]);
252
+
253
+ const handleRefineCaptionItem = useCallback(async (id: string, itemInstructions?: string) => {
254
+ const fileToProcess = (mediaFiles || []).find(mf => mf.id === id);
255
+ if (!hasValidConfig || !fileToProcess || !fileToProcess.caption) return;
256
+
257
+ updateFile(id, { status: GenerationStatus.GENERATING, errorMessage: undefined });
258
+
259
+ const combinedInstructions = `${bulkRefinementInstructions}\n\n${itemInstructions || ''}`.trim();
260
+
261
+ try {
262
+ const caption = apiProvider === 'gemini'
263
+ ? await refineCaption(fileToProcess.file, fileToProcess.caption, combinedInstructions, abortControllerRef.current.signal, geminiApiKey, geminiModel)
264
+ : await refineCaptionQwen('', qwenEndpoint, qwenEffectiveModel, fileToProcess.file, fileToProcess.caption, combinedInstructions, qwenVideoFrameCount, abortControllerRef.current.signal);
265
+
266
+ updateFile(id, { caption, status: GenerationStatus.SUCCESS });
267
+ } catch (err: any) {
268
+ if (err.name === 'AbortError' || err.message === 'AbortError') {
269
+ updateFile(id, { status: GenerationStatus.IDLE, errorMessage: "Stopped by user" });
270
+ } else {
271
+ updateFile(id, { status: GenerationStatus.ERROR, errorMessage: err.message });
272
+ }
273
+ }
274
+ }, [mediaFiles, apiProvider, qwenEndpoint, qwenEffectiveModel, qwenVideoFrameCount, bulkRefinementInstructions, hasValidConfig, updateFile, geminiApiKey, geminiModel]);
275
+
276
+ // --- QUEUE CONTROLLER ---
277
+ const runTasksInQueue = async (tasks: (() => Promise<void>)[]) => {
278
+ setIsQueueRunning(true);
279
+ const pool = new Set<Promise<void>>();
280
+ for (const task of tasks) {
281
+ if (abortControllerRef.current.signal.aborted) break;
282
+ const promise = task();
283
+ pool.add(promise);
284
+ promise.finally(() => pool.delete(promise));
285
+ if (pool.size >= concurrentTasks) {
286
+ await Promise.race(pool);
287
+ }
288
+ }
289
+ await Promise.all(pool);
290
+ setIsQueueRunning(false);
291
+ };
292
+
293
+ const handleBulkGenerate = () => {
294
+ const tasks = selectedFiles.map(file => () => handleGenerateCaption(file.id, file.customInstructions));
295
+ if (useRequestQueue) {
296
+ runTasksInQueue(tasks);
297
+ } else {
298
+ tasks.forEach(t => t());
299
+ }
300
+ };
301
+
302
+ const handleBulkRefine = () => {
303
+ const tasks = selectedFiles.map(file => () => handleRefineCaptionItem(file.id, file.customInstructions));
304
+ if (useRequestQueue) {
305
+ runTasksInQueue(tasks);
306
+ } else {
307
+ tasks.forEach(t => t());
308
+ }
309
+ };
310
+
311
+ const handleBulkQualityCheck = () => {
312
+ const tasks = selectedFiles.map(file => () => handleCheckQuality(file.id));
313
+ if (useRequestQueue) {
314
+ runTasksInQueue(tasks);
315
+ } else {
316
+ tasks.forEach(t => t());
317
+ }
318
+ };
319
+
320
+ const handleClearWorkflow = useCallback(() => {
321
+ setComfyWorkflow(DEFAULT_COMFY_WORKFLOW);
322
+ setComfyWorkflowName('Default Workflow');
323
+ }, []);
324
+
325
+ const handleComfyPreview = useCallback(async (id: string) => {
326
+ const item = (mediaFiles || []).find(m => m.id === id);
327
+ if (!item || !comfyWorkflow || !comfyUrl) return;
328
+
329
+ updateFile(id, { comfyStatus: 'generating', comfyErrorMessage: undefined });
330
+ try {
331
+ const previewUrl = await sendComfyPrompt(comfyUrl, comfyWorkflow, item.caption, comfySeed, comfySteps, useSecureBridge, abortControllerRef.current.signal);
332
+ updateFile(id, { comfyPreviewUrl: previewUrl, comfyStatus: 'success' });
333
+ } catch (err: any) {
334
+ if (err.name === 'AbortError' || err.message === 'Aborted') {
335
+ updateFile(id, { comfyStatus: 'idle', comfyErrorMessage: "Stopped" });
336
+ } else {
337
+ updateFile(id, { comfyStatus: 'error', comfyErrorMessage: err.message });
338
+ }
339
+ }
340
+ }, [mediaFiles, comfyWorkflow, comfyUrl, comfySeed, comfySteps, useSecureBridge, updateFile]);
341
+
342
+ const handleBulkPreview = () => {
343
+ selectedFiles.forEach(file => handleComfyPreview(file.id));
344
+ };
345
+
346
+ const handleDeleteSelected = useCallback(() => {
347
+ setMediaFiles(prev => {
348
+ const remaining = (prev || []).filter(mf => !mf.isSelected);
349
+ return remaining || [];
350
+ });
351
+ }, []);
352
+
353
+ const handleStopTasks = () => {
354
+ abortControllerRef.current.abort();
355
+ abortControllerRef.current = new AbortController();
356
+ setIsQueueRunning(false);
357
+ setMediaFiles(prev => (prev || []).map(mf => {
358
+ if (mf.status === GenerationStatus.GENERATING || mf.status === GenerationStatus.CHECKING) {
359
+ return { ...mf, status: GenerationStatus.IDLE, errorMessage: "Stopped by user" };
360
+ }
361
+ if (mf.comfyStatus === 'generating') {
362
+ return { ...mf, comfyStatus: 'idle', comfyErrorMessage: "Stopped" };
363
+ }
364
+ return mf;
365
+ }));
366
+ };
367
+
368
+ const handleExportDataset = useCallback(async () => {
369
+ if (selectedFiles.length === 0) return;
370
+ const JSZip = (window as any).JSZip;
371
+ if (!JSZip) return alert("JSZip not loaded.");
372
+
373
+ setIsExporting(true);
374
+ try {
375
+ const zip = new JSZip();
376
+ const prefix = datasetPrefix.trim() || 'item';
377
+ selectedFiles.forEach((mf, idx) => {
378
+ const fileExt = mf.file.name.split('.').pop() || 'dat';
379
+ const finalName = `${prefix}_${idx + 1}`;
380
+ zip.file(`${finalName}.${fileExt}`, mf.file);
381
+ zip.file(`${finalName}.txt`, mf.caption || "");
382
+ });
383
+ const content = await zip.generateAsync({ type: 'blob' });
384
+ const link = document.createElement('a');
385
+ link.href = URL.createObjectURL(content);
386
+ link.download = `lora_dataset_${new Date().getTime()}.zip`;
387
+ link.click();
388
+ } catch (err: any) {
389
+ alert("Export failed: " + err.message);
390
+ } finally { setIsExporting(false); }
391
+ }, [selectedFiles, datasetPrefix]);
392
+
393
+ const handleNextPreview = useCallback(() => {
394
+ if (!activePreviewId || (mediaFiles || []).length <= 1) return;
395
+ const currentIndex = mediaFiles.findIndex(m => m.id === activePreviewId);
396
+ const nextIndex = (currentIndex + 1) % mediaFiles.length;
397
+ setActivePreviewId(mediaFiles[nextIndex].id);
398
+ }, [activePreviewId, mediaFiles]);
399
+
400
+ const handlePrevPreview = useCallback(() => {
401
+ if (!activePreviewId || (mediaFiles || []).length <= 1) return;
402
+ const currentIndex = mediaFiles.findIndex(m => m.id === activePreviewId);
403
+ const prevIndex = (currentIndex - 1 + mediaFiles.length) % mediaFiles.length;
404
+ setActivePreviewId(mediaFiles[prevIndex].id);
405
+ }, [activePreviewId, mediaFiles]);
406
+
407
+ const downloadQwenSetupScript = () => {
408
+ const isWin = qwenOsType === 'windows';
409
+ const content = isWin
410
+ ? `@echo off\npython -m venv venv\ncall venv\\Scripts\\activate\npip install vllm bitsandbytes\necho Setup Complete.`
411
+ : `#!/bin/bash\npython3 -m venv venv\nsource venv/bin/activate\npip install vllm bitsandbytes\necho Setup Complete.`;
412
+ const filename = isWin ? 'setup_qwen.bat' : 'setup_qwen.sh';
413
+ const blob = new Blob([content], { type: 'text/plain' });
414
+ const url = URL.createObjectURL(blob);
415
+ const a = document.createElement('a');
416
+ a.href = url;
417
+ a.download = filename;
418
+ a.click();
419
+ URL.revokeObjectURL(url);
420
+ };
421
+
422
+ const downloadBridgeScript = () => {
423
+ const code = `import requests\nfrom flask import Flask, request, Response\nfrom flask_cors import CORS\napp = Flask(__name__)\nCORS(app)\nTARGET = "http://127.0.0.1:8188"\n@app.route('/', defaults={'path': ''}, methods=['GET','POST','PUT','DELETE','PATCH','OPTIONS'])\n@app.route('/<path:path>', methods=['GET','POST','PUT','DELETE','PATCH','OPTIONS'])\ndef proxy(path):\n url = f"{TARGET}/{path}"\n headers = {k:v for k,v in request.headers.items() if k.lower() not in ['host', 'origin', 'referer']}\n resp = requests.request(method=request.method, url=url, headers=headers, data=request.get_data(), params=request.args, stream=True)\n return Response(resp.content, resp.status_code, [(n,v) for n,v in resp.headers.items() if n.lower() not in ['content-encoding','content-length','transfer-encoding','connection']])\nif __name__ == '__main__': app.run(port=5000, host='0.0.0.0')`;
424
+ const blob = new Blob([code], { type: 'text/x-python' });
425
+ const url = URL.createObjectURL(blob);
426
+ const a = document.createElement('a');
427
+ a.href = url;
428
+ a.download = 'bridge.py';
429
+ a.click();
430
+ URL.revokeObjectURL(url);
431
+ };
432
+
433
+ // --- RENDER ---
434
+ return (
435
+ <div className="min-h-screen bg-gray-950 text-gray-100 font-sans p-4 sm:p-8">
436
+ {/* PREVIEW MODAL */}
437
+ {activePreviewId && currentPreviewItem && (
438
+ <div className="fixed inset-0 z-50 flex items-center justify-center p-4 bg-black/95 backdrop-blur-sm animate-fade-in" onClick={() => setActivePreviewId(null)}>
439
+ <div className="bg-gray-900 w-full max-w-6xl rounded-2xl border border-gray-700 overflow-hidden flex flex-col max-h-[95vh] animate-scale-up shadow-2xl relative" onClick={(e) => e.stopPropagation()}>
440
+ <button onClick={handlePrevPreview} className="absolute left-4 top-1/2 -translate-y-1/2 z-10 p-4 bg-gray-800/80 hover:bg-indigo-600 rounded-full text-white shadow-2xl transition-all border border-white/5 active:scale-90">
441
+ <svg className="w-6 h-6" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path strokeLinecap="round" strokeLinejoin="round" strokeWidth="3" d="M15 19l-7-7 7-7"/></svg>
442
+ </button>
443
+ <button onClick={handleNextPreview} className="absolute right-4 top-1/2 -translate-y-1/2 z-10 p-4 bg-gray-800/80 hover:bg-indigo-600 rounded-full text-white shadow-2xl transition-all border border-white/5 active:scale-90">
444
+ <svg className="w-6 h-6" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path strokeLinecap="round" strokeLinejoin="round" strokeWidth="3" d="M9 5l7 7-7 7"/></svg>
445
+ </button>
446
+ <div className="px-6 py-4 border-b border-gray-800 flex justify-between items-center bg-gray-850">
447
+ <div className="flex items-center gap-4">
448
+ <SparklesIcon className="w-5 h-5 text-indigo-400" />
449
+ <div className="flex flex-col">
450
+ <h3 className="text-xs font-black uppercase tracking-widest text-gray-400">{(mediaFiles || []).findIndex(m => m.id === activePreviewId) + 1} of {mediaFiles.length}</h3>
451
+ <h3 className="text-[11px] font-bold truncate max-w-md text-gray-500">{currentPreviewItem.file.name}</h3>
452
+ </div>
453
+ </div>
454
+ <div className="flex items-center gap-2">
455
+ <button onClick={handlePrevPreview} className="px-4 py-2 bg-gray-800 hover:bg-gray-700 rounded-xl text-[10px] font-black uppercase transition-all">Prev</button>
456
+ <button onClick={handleNextPreview} className="px-4 py-2 bg-gray-800 hover:bg-gray-700 rounded-xl text-[10px] font-black uppercase transition-all">Next</button>
457
+ <button onClick={() => setActivePreviewId(null)} className="ml-4 p-2 hover:bg-red-600/20 rounded-full transition-colors text-gray-500 hover:text-red-400"><XIcon className="w-5 h-5" /></button>
458
+ </div>
459
+ </div>
460
+ <div className="flex-grow overflow-y-auto p-6 space-y-8 bg-black/40">
461
+ <div className="grid grid-cols-1 md:grid-cols-2 gap-8 h-[450px]">
462
+ <div className="bg-black rounded-2xl border border-gray-800 flex items-center justify-center overflow-hidden relative shadow-inner">
463
+ {currentPreviewItem.file.type.startsWith('video/') ? <video src={currentPreviewItem.previewUrl} className="max-h-full" controls /> : <img src={currentPreviewItem.previewUrl} className="max-h-full object-contain" />}
464
+ <div className="absolute top-3 left-3 bg-black/70 backdrop-blur-md px-3 py-1 rounded-lg text-[10px] font-black uppercase text-white/80 border border-white/5">Original Data</div>
465
+ </div>
466
+ <div className="bg-black rounded-2xl border border-gray-800 flex items-center justify-center relative overflow-hidden shadow-inner">
467
+ {currentPreviewItem.comfyPreviewUrl ? <img src={currentPreviewItem.comfyPreviewUrl} className="max-h-full object-contain" /> : <div className="text-xs uppercase text-gray-700 tracking-widest font-black">No Preview Rendered</div>}
468
+ {currentPreviewItem.comfyStatus === 'generating' && <div className="absolute inset-0 bg-black/60 flex flex-col items-center justify-center gap-3"><LoaderIcon className="w-10 h-10 animate-spin text-orange-500" /><span className="text-xs font-black uppercase text-orange-400 tracking-widest">Rendering via ComfyUI...</span></div>}
469
+ <div className="absolute top-3 left-3 bg-orange-600/70 backdrop-blur-md px-3 py-1 rounded-lg text-[10px] font-black uppercase text-white/90 border border-white/5">ComfyUI Render</div>
470
+ </div>
471
+ </div>
472
+ <div className="space-y-6">
473
+ <textarea value={currentPreviewItem.caption} onChange={(e) => updateFile(currentPreviewItem.id, { caption: e.target.value })} className="w-full bg-gray-950 border border-gray-700 rounded-2xl p-6 text-sm h-40 outline-none focus:ring-2 focus:ring-indigo-500 transition-all shadow-inner leading-relaxed" />
474
+ <div className="flex gap-4">
475
+ <input type="text" value={currentPreviewItem.customInstructions} onChange={(e) => updateFile(currentPreviewItem.id, { customInstructions: e.target.value })} placeholder="Refine caption instructions..." className="flex-grow bg-gray-800 border border-gray-700 rounded-xl px-5 py-3 text-sm outline-none focus:ring-1 focus:ring-indigo-500 shadow-sm" />
476
+ <button onClick={() => handleGenerateCaption(currentPreviewItem.id, currentPreviewItem.customInstructions)} className="px-8 py-3 bg-green-600 hover:bg-green-500 text-white rounded-xl text-xs font-black uppercase transition-all shadow-xl active:scale-95">Re-Generate</button>
477
+ <button onClick={() => handleRefineCaptionItem(currentPreviewItem.id, currentPreviewItem.customInstructions)} className="px-8 py-3 bg-indigo-600 hover:bg-indigo-500 text-white rounded-xl text-xs font-black uppercase transition-all shadow-xl active:scale-95">Refine</button>
478
+ <button onClick={() => handleCheckQuality(currentPreviewItem.id)} className="px-8 py-3 bg-blue-600 hover:bg-blue-500 text-white rounded-xl text-xs font-black uppercase transition-all shadow-xl active:scale-95">Check Quality</button>
479
+ <button onClick={() => handleComfyPreview(currentPreviewItem.id)} className="px-8 py-3 bg-orange-600 hover:bg-orange-500 text-white rounded-xl text-xs font-black uppercase transition-all shadow-xl active:scale-95">Preview</button>
480
+ </div>
481
+ </div>
482
+ </div>
483
+ </div>
484
+ </div>
485
+ )}
486
+
487
+ <main className="max-w-6xl mx-auto space-y-8 animate-fade-in">
488
+ <section className="bg-gray-900 border border-gray-800 p-8 rounded-3xl shadow-2xl space-y-12">
489
+ <h2 className="text-3xl font-black flex items-center gap-4 uppercase tracking-tighter text-white">1. Global Settings & Actions</h2>
490
+
491
+ <div className="grid grid-cols-1 lg:grid-cols-2 gap-16">
492
+ <div className="space-y-10">
493
+ <div>
494
+ <label className="text-xs font-black text-gray-500 uppercase tracking-widest block mb-4">AI Provider</label>
495
+ <div className="flex p-1.5 bg-black rounded-2xl border border-gray-800 shadow-inner">
496
+ <button onClick={() => setApiProvider('gemini')} className={`flex-1 py-3 text-xs font-black uppercase rounded-xl transition-all ${apiProvider === 'gemini' ? 'bg-indigo-600 text-white shadow-lg' : 'text-gray-600 hover:text-gray-400'}`}>Google Gemini</button>
497
+ <button onClick={() => setApiProvider('qwen')} className={`flex-1 py-3 text-xs font-black uppercase rounded-xl transition-all ${apiProvider === 'qwen' ? 'bg-indigo-600 text-white shadow-lg' : 'text-gray-600 hover:text-gray-400'}`}>Local Qwen (GPU)</button>
498
+ </div>
499
+ </div>
500
+
501
+ {apiProvider === 'gemini' ? (
502
+ <div className="bg-indigo-500/5 border border-indigo-500/20 p-6 rounded-3xl space-y-6 animate-slide-down shadow-xl">
503
+ <div className="space-y-4">
504
+ <div className="flex justify-between items-center">
505
+ <label className="text-[10px] font-black text-indigo-400 uppercase tracking-widest">Gemini Model Version</label>
506
+ </div>
507
+ <select
508
+ value={geminiModel}
509
+ onChange={(e) => setGeminiModel(e.target.value)}
510
+ className="w-full p-3 bg-black border border-indigo-500/30 rounded-xl text-xs font-bold text-gray-300 shadow-inner focus:ring-1 focus:ring-indigo-500 outline-none"
511
+ >
512
+ {GEMINI_MODELS.map(m => <option key={m.id} value={m.id}>{m.name}</option>)}
513
+ </select>
514
+ </div>
515
+
516
+ <div className="space-y-4">
517
+ <div className="flex justify-between items-center">
518
+ <label className="text-[10px] font-black text-indigo-400 uppercase tracking-widest">Gemini API Key</label>
519
+ {geminiApiKey && <span className="flex items-center gap-1.5 text-[9px] font-black uppercase text-green-400 bg-green-400/10 px-2 py-0.5 rounded-full"><CheckCircleIcon className="w-3 h-3"/> Configured</span>}
520
+ </div>
521
+ <div className="relative group">
522
+ <input
523
+ type="password"
524
+ value={geminiApiKey}
525
+ onChange={(e) => setGeminiApiKey(e.target.value)}
526
+ placeholder="Enter your Gemini API key here..."
527
+ className="w-full py-4 px-5 bg-black border border-indigo-500/30 rounded-2xl text-xs font-mono shadow-inner focus:ring-1 focus:ring-indigo-500 outline-none hover:border-indigo-500/60 transition-all"
528
+ />
529
+ <div className="absolute right-4 top-1/2 -translate-y-1/2 pointer-events-none text-indigo-400/50 group-hover:text-indigo-400 transition-colors">
530
+ <SparklesIcon className="w-5 h-5" />
531
+ </div>
532
+ </div>
533
+ </div>
534
+ <p className="text-[10px] text-gray-500 flex items-center gap-1.5 px-1">
535
+ <AlertTriangleIcon className="w-3 h-3 text-indigo-400" />
536
+ Get an API key from
537
+ <a href="https://aistudio.google.com/app/apikey" target="_blank" rel="noopener noreferrer" className="text-indigo-400 hover:underline font-bold">Google AI Studio</a>
538
+ </p>
539
+ </div>
540
+ ) : (
541
+ <div className="bg-gray-950 p-6 rounded-3xl border border-gray-800 space-y-6 animate-slide-down shadow-xl">
542
+ <div className="flex justify-between items-center mb-2">
543
+ <label className="text-[10px] font-black text-indigo-400 uppercase tracking-widest">Local Model Configuration</label>
544
+ <div className="flex items-center gap-4">
545
+ <label className="flex items-center gap-2 cursor-pointer group">
546
+ <input type="checkbox" checked={useOfflineSnapshot} onChange={e => setUseOfflineSnapshot(e.target.checked)} className="h-4 w-4 rounded bg-gray-800 border-gray-700 text-indigo-600" />
547
+ <span className="text-[10px] font-bold text-orange-400 group-hover:text-orange-300">Use Offline Local Snapshot</span>
548
+ </label>
549
+ {!useOfflineSnapshot && (
550
+ <label className="flex items-center gap-2 cursor-pointer group">
551
+ <input type="checkbox" checked={useCustomQwenModel} onChange={e => setUseCustomQwenModel(e.target.checked)} className="h-4 w-4 rounded bg-gray-800 border-gray-700 text-indigo-600" />
552
+ <span className="text-[10px] font-bold text-gray-500 group-hover:text-gray-300">Custom Model ID</span>
553
+ </label>
554
+ )}
555
+ </div>
556
+ </div>
557
+
558
+ {useOfflineSnapshot ? (
559
+ <div className="space-y-4 animate-slide-down">
560
+ <div className="space-y-1">
561
+ <label className="text-[9px] font-black text-gray-700 uppercase">Snapshot Directory Path</label>
562
+ <input type="text" value={snapshotPath} onChange={e => setSnapshotPath(e.target.value)} placeholder="/path/to/hf_cache/.../snapshots/hash..." className="w-full p-2.5 bg-black border border-gray-800 rounded-xl text-xs font-mono shadow-inner" />
563
+ </div>
564
+ <div className="space-y-1">
565
+ <label className="text-[9px] font-black text-gray-700 uppercase">Virtual Model Name (Served Name)</label>
566
+ <input type="text" value={virtualModelName} onChange={e => setVirtualModelName(e.target.value)} placeholder="org/model-id..." className="w-full p-2.5 bg-black border border-gray-800 rounded-xl text-xs font-mono shadow-inner" />
567
+ </div>
568
+ </div>
569
+ ) : useCustomQwenModel ? (
570
+ <input type="text" value={customQwenModelId} onChange={e => setCustomQwenModelId(e.target.value)} placeholder="org/model-id..." className="w-full p-3 bg-black border border-gray-800 rounded-xl text-xs font-mono shadow-inner" />
571
+ ) : (
572
+ <select value={qwenModel} onChange={e => setQwenModel(e.target.value)} className="w-full p-3 bg-black border border-gray-800 rounded-xl text-xs font-bold text-gray-300 shadow-inner">
573
+ {QWEN_MODELS.map(m => <option key={m.id} value={m.id}>{m.name}</option>)}
574
+ </select>
575
+ )}
576
+
577
+ <div className="pt-4 border-t border-gray-800 space-y-4">
578
+ <div className="flex justify-between items-center">
579
+ <span className="text-[10px] font-black text-gray-600 uppercase">OS Type:</span>
580
+ <div className="flex gap-2">
581
+ <button onClick={() => setQwenOsType('windows')} className={`px-3 py-1 text-[9px] font-black uppercase rounded-lg transition-all ${qwenOsType === 'windows' ? 'bg-indigo-600 text-white' : 'text-gray-600 hover:text-gray-400'}`}>Windows</button>
582
+ <button onClick={() => setQwenOsType('linux')} className={`px-3 py-1 text-[9px] font-black uppercase rounded-lg transition-all ${qwenOsType === 'linux' ? 'bg-indigo-600 text-white' : 'text-gray-600 hover:text-gray-400'}`}>Linux</button>
583
+ </div>
584
+ </div>
585
+
586
+ <div className="grid grid-cols-4 gap-4">
587
+ <div className="col-span-3 space-y-1">
588
+ <label className="text-[9px] font-black text-gray-700 uppercase">Install Path</label>
589
+ <input type="text" value={qwenInstallDir} onChange={e => setQwenInstallDir(e.target.value)} className="w-full p-2.5 bg-black border border-gray-800 rounded-xl text-xs font-mono" />
590
+ </div>
591
+ <div className="space-y-1">
592
+ <label className="text-[9px] font-black text-gray-700 uppercase">Max Tokens</label>
593
+ <input type="number" value={qwenMaxTokens} onChange={e => setQwenMaxTokens(Number(e.target.value))} className="w-full p-2.5 bg-black border border-gray-800 rounded-xl text-xs text-center" />
594
+ </div>
595
+ </div>
596
+
597
+ <div className="flex flex-col gap-2">
598
+ <label className="flex items-center gap-2 cursor-pointer group">
599
+ <input type="checkbox" checked={qwen8Bit} onChange={e => setQwen8Bit(e.target.checked)} className="h-4 w-4 rounded bg-gray-800 text-indigo-600" />
600
+ <span className="text-[10px] font-bold text-gray-500 group-hover:text-gray-300">Enable 8-bit Quantization (bitsandbytes)</span>
601
+ </label>
602
+ <label className="flex items-center gap-2 cursor-pointer group">
603
+ <input type="checkbox" checked={qwenEager} onChange={e => setQwenEager(e.target.checked)} className="h-4 w-4 rounded bg-gray-950 text-indigo-600" />
604
+ <span className="text-[10px] font-bold text-gray-500 group-hover:text-gray-300">Enforce Eager Mode</span>
605
+ </label>
606
+ </div>
607
+
608
+ <button onClick={downloadQwenSetupScript} className="w-full py-3 bg-green-700 hover:bg-green-600 text-white text-[10px] font-black uppercase rounded-xl transition-all shadow-lg">Download Setup Script</button>
609
+
610
+ <div className="space-y-2">
611
+ <label className="text-[9px] font-black text-gray-700 uppercase">Local Start Command:</label>
612
+ <div className="relative group">
613
+ <div className="p-3 bg-black rounded-xl border border-gray-900 font-mono text-[10px] text-green-500/80 break-all leading-relaxed max-h-24 overflow-y-auto shadow-inner">
614
+ {qwenStartCommand}
615
+ </div>
616
+ <button onClick={() => navigator.clipboard.writeText(qwenStartCommand)} className="absolute top-2 right-2 p-1.5 bg-gray-800 hover:bg-gray-700 text-gray-400 rounded-lg opacity-0 group-hover:opacity-100 transition-all"><CopyIcon className="w-3.5 h-3.5"/></button>
617
+ </div>
618
+ </div>
619
+
620
+ <div className="space-y-1">
621
+ <label className="text-[10px] font-black text-indigo-400 uppercase tracking-widest">Endpoint URL (Tunnel or Local)</label>
622
+ <input type="text" value={qwenEndpoint} onChange={e => setQwenEndpoint(e.target.value)} placeholder="http://localhost:8000/v1" className="w-full p-3 bg-black border border-gray-800 rounded-xl text-xs font-mono shadow-inner focus:ring-1 focus:ring-indigo-500 outline-none" />
623
+ </div>
624
+ </div>
625
+ </div>
626
+ )}
627
+
628
+ <div className="space-y-6">
629
+ <div className="grid grid-cols-1 sm:grid-cols-2 gap-4">
630
+ <div className="space-y-2">
631
+ <label className="text-[10px] font-black text-gray-500 uppercase tracking-widest">Trigger Word</label>
632
+ <input type="text" value={triggerWord} onChange={e => setTriggerWord(e.target.value)} className="w-full p-3 bg-gray-950 border border-gray-800 rounded-2xl text-sm font-bold shadow-inner" placeholder="MyStyle" />
633
+ </div>
634
+ <div className="space-y-2">
635
+ <label className="text-[10px] font-black text-gray-500 uppercase tracking-widest">File Prefix</label>
636
+ <input type="text" value={datasetPrefix} onChange={e => setDatasetPrefix(e.target.value)} className="w-full p-3 bg-gray-950 border border-gray-800 rounded-2xl text-sm font-bold shadow-inner" placeholder="item" />
637
+ </div>
638
+ </div>
639
+ <div className="bg-gray-800/40 p-5 rounded-3xl border border-gray-800 space-y-4 shadow-xl">
640
+ <label className="flex items-center gap-3 cursor-pointer group">
641
+ <input type="checkbox" checked={isCharacterTaggingEnabled} onChange={(e) => setIsCharacterTaggingEnabled(e.target.checked)} className="h-6 w-6 rounded-lg bg-gray-900 border-gray-700 text-indigo-600 transition-all shadow-sm" />
642
+ <span className="text-xs font-black text-gray-500 uppercase tracking-wider group-hover:text-gray-300 transition-colors">Character Tagging</span>
643
+ </label>
644
+ {isCharacterTaggingEnabled && (
645
+ <div className="animate-slide-down">
646
+ <input type="text" value={characterShowName} onChange={(e) => setCharacterShowName(e.target.value)} placeholder="Enter show/series name..." className="w-full p-3 bg-gray-950 border border-gray-700 rounded-xl text-xs font-medium focus:ring-1 focus:ring-indigo-500 outline-none transition-all shadow-inner" />
647
+ </div>
648
+ )}
649
+ </div>
650
+ </div>
651
+ </div>
652
+
653
+ <div className="space-y-10">
654
+ <div className="space-y-8">
655
+ <div className="space-y-3">
656
+ <label className="text-xs font-black text-gray-500 uppercase tracking-widest block">System Instructions & Prompting</label>
657
+ <textarea value={bulkGenerationInstructions} onChange={(e) => setBulkGenerationInstructions(e.target.value)} className="w-full p-5 bg-gray-950 border border-gray-800 rounded-3xl text-[13px] h-40 leading-relaxed resize-none outline-none focus:ring-2 focus:ring-indigo-500 shadow-inner" placeholder="Enter global captioning rules..." />
658
+ </div>
659
+ <div className="space-y-3">
660
+ <label className="text-xs font-black text-indigo-400 uppercase tracking-widest block">Refinement Instructions</label>
661
+ <textarea value={bulkRefinementInstructions} onChange={(e) => setBulkRefinementInstructions(e.target.value)} className="w-full p-5 bg-gray-950 border border-indigo-500/20 rounded-3xl text-[13px] h-40 leading-relaxed resize-none outline-none focus:ring-2 focus:ring-indigo-500 shadow-inner" placeholder="Enter instructions for refining existing captions..." />
662
+ </div>
663
+ </div>
664
+
665
+ <div className="flex flex-col gap-6 pt-4 border-t border-gray-800">
666
+ <div className="flex flex-wrap gap-x-8 gap-y-4">
667
+ <label className="flex items-center gap-3 cursor-pointer group">
668
+ <input type="checkbox" checked={autofitTextareas} onChange={(e) => setAutofitTextareas(e.target.checked)} className="h-5 w-5 rounded-md bg-gray-900 border-gray-700 text-indigo-500 shadow-inner" />
669
+ <span className="text-xs font-bold text-gray-500 uppercase group-hover:text-gray-300 transition-colors">Autofit Textboxes</span>
670
+ </label>
671
+ <label className="flex items-center gap-3 cursor-pointer group">
672
+ <input type="checkbox" checked={showSideBySidePreview} onChange={(e) => setShowSideBySidePreview(e.target.checked)} className="h-5 w-5 rounded-md bg-gray-900 border-gray-700 text-indigo-500 shadow-inner" />
673
+ <span className="text-xs font-bold text-gray-500 uppercase group-hover:text-gray-300 transition-colors">Side-by-Side Comparison</span>
674
+ </label>
675
+ <label className="flex items-center gap-3 cursor-pointer group">
676
+ <input type="checkbox" checked={isComfyEnabled} onChange={(e) => setIsComfyEnabled(e.target.checked)} className="h-5 w-5 rounded-md bg-gray-900 border-gray-700 text-orange-500 shadow-inner" />
677
+ <span className="text-xs font-black text-orange-500 uppercase tracking-widest group-hover:text-orange-400 transition-colors">Enable ComfyUI Previews</span>
678
+ </label>
679
+ </div>
680
+
681
+ <div className="bg-indigo-600/5 border border-indigo-600/20 p-6 rounded-3xl space-y-4">
682
+ <div className="flex justify-between items-center">
683
+ <label className="flex items-center gap-3 cursor-pointer group">
684
+ <input type="checkbox" checked={useRequestQueue} onChange={(e) => setUseRequestQueue(e.target.checked)} className="h-5 w-5 rounded bg-gray-900 border-gray-700 text-indigo-500" />
685
+ <span className="text-xs font-black text-indigo-400 uppercase tracking-widest group-hover:text-indigo-300 transition-colors">Enable Request Queue</span>
686
+ </label>
687
+ {useRequestQueue && (
688
+ <div className="flex items-center gap-3">
689
+ <label className="text-[10px] font-black text-gray-600 uppercase">Concurrent Tasks</label>
690
+ <input type="number" min="1" max="10" value={concurrentTasks} onChange={(e) => setConcurrentTasks(Number(e.target.value))} className="w-16 p-1 bg-black border border-gray-800 rounded text-center text-xs font-bold" />
691
+ </div>
692
+ )}
693
+ </div>
694
+ <p className="text-[10px] text-gray-600 italic">Recommended for Gemini Free Tier or Local GPU to prevent rate limits or OOM errors.</p>
695
+ </div>
696
+
697
+ {isComfyEnabled && (
698
+ <div className="bg-orange-600/5 border border-orange-600/20 p-6 rounded-3xl space-y-6 animate-slide-down shadow-xl">
699
+ <div className="grid grid-cols-1 md:grid-cols-2 gap-6">
700
+ <div className="space-y-2">
701
+ <label className="text-[10px] font-black text-gray-600 uppercase">Endpoint</label>
702
+ <input type="text" value={comfyUrl} onChange={(e) => setComfyUrl(e.target.value)} placeholder="http://127.0.0.1:8188" className="w-full p-3 bg-black border border-gray-800 rounded-xl text-xs font-mono shadow-inner" />
703
+ </div>
704
+ <div className="space-y-2">
705
+ <label className="text-[10px] font-black text-gray-600 uppercase">Workflow ({comfyWorkflowName})</label>
706
+ <div className="flex gap-2">
707
+ <button onClick={() => document.getElementById('wf-up')?.click()} className="flex-1 py-2.5 bg-orange-600 hover:bg-orange-500 text-white rounded-xl shadow-lg transition-all active:scale-95 text-[10px] font-black uppercase tracking-widest">Load JSON</button>
708
+ <button onClick={handleClearWorkflow} className="px-4 bg-gray-800 hover:bg-gray-700 text-gray-400 rounded-xl transition-all active:scale-95"><TrashIcon className="w-4 h-4"/></button>
709
+ <input id="wf-up" type="file" accept=".json" onChange={(e) => {
710
+ const f = e.target.files?.[0];
711
+ if (f) {
712
+ const r = new FileReader();
713
+ r.onload = (ev) => {
714
+ try {
715
+ setComfyWorkflow(JSON.parse(ev.target?.result as string));
716
+ setComfyWorkflowName(f.name);
717
+ } catch { alert("Invalid Workflow JSON"); }
718
+ };
719
+ r.readAsText(f);
720
+ }
721
+ }} className="hidden" />
722
+ </div>
723
+ </div>
724
+ <div className="space-y-2">
725
+ <label className="text-[10px] font-black text-gray-600 uppercase">Default Seed (-1 for random)</label>
726
+ <input type="number" value={comfySeed} onChange={(e) => setComfySeed(Number(e.target.value))} className="w-full p-3 bg-black border border-gray-800 rounded-xl text-xs shadow-inner" />
727
+ </div>
728
+ <div className="space-y-2">
729
+ <label className="text-[10px] font-black text-gray-600 uppercase">Steps</label>
730
+ <input type="number" value={comfySteps} onChange={(e) => setComfySteps(Number(e.target.value))} className="w-full p-3 bg-black border border-gray-800 rounded-xl text-xs shadow-inner" />
731
+ </div>
732
+ </div>
733
+
734
+ {/* Secure Bridge Sub-section */}
735
+ <div className="pt-6 border-t border-orange-600/10 space-y-6">
736
+ <div className="flex justify-between items-center">
737
+ <h3 className="text-[11px] font-black text-orange-400 uppercase tracking-widest">Secure Bridge (for HTTPS/Remote access)</h3>
738
+ <label className="flex items-center gap-3 cursor-pointer group">
739
+ <input type="checkbox" checked={useSecureBridge} onChange={(e) => setUseSecureBridge(e.target.checked)} className="h-5 w-5 rounded bg-gray-900 border-gray-700 text-orange-500" />
740
+ <span className="text-[10px] font-bold text-gray-500 group-hover:text-gray-300 transition-colors">Enable Bridge Proxy</span>
741
+ </label>
742
+ </div>
743
+
744
+ {useSecureBridge && (
745
+ <div className="space-y-6 animate-slide-down">
746
+ <div className="grid grid-cols-1 md:grid-cols-2 gap-6">
747
+ <div className="space-y-2">
748
+ <label className="text-[9px] font-black text-gray-600 uppercase">Bridge OS</label>
749
+ <div className="flex gap-2">
750
+ <button onClick={() => setBridgeOsType('windows')} className={`flex-1 py-2 text-[10px] font-black uppercase rounded-lg transition-all ${bridgeOsType === 'windows' ? 'bg-orange-600 text-white' : 'bg-gray-800 text-gray-500'}`}>Windows</button>
751
+ <button onClick={() => setBridgeOsType('linux')} className={`flex-1 py-2 text-[10px] font-black uppercase rounded-lg transition-all ${bridgeOsType === 'linux' ? 'bg-orange-600 text-white' : 'bg-gray-800 text-gray-500'}`}>Linux</button>
752
+ </div>
753
+ </div>
754
+ <div className="space-y-2">
755
+ <label className="text-[9px] font-black text-gray-600 uppercase">Install Path</label>
756
+ <input type="text" value={bridgeInstallPath} onChange={(e) => setBridgeInstallPath(e.target.value)} className="w-full p-3 bg-black border border-gray-800 rounded-xl text-xs font-mono shadow-inner" />
757
+ </div>
758
+ </div>
759
+
760
+ <div className="space-y-4">
761
+ <label className="flex items-center gap-3 cursor-pointer group">
762
+ <input type="checkbox" checked={isFirstTimeBridge} onChange={(e) => setIsFirstTimeBridge(e.target.checked)} className="h-4 w-4 rounded bg-gray-950 border-gray-800 text-orange-500" />
763
+ <span className="text-[10px] font-bold text-gray-500 group-hover:text-gray-300">First-time Setup (Include VENV & Pip Install)</span>
764
+ </label>
765
+ <div className="flex gap-4">
766
+ <button onClick={downloadBridgeScript} className="flex-1 py-3 bg-orange-700 hover:bg-orange-600 text-white text-[10px] font-black uppercase rounded-xl transition-all shadow-lg">Download Bridge.py</button>
767
+ </div>
768
+ </div>
769
+
770
+ <div className="space-y-2">
771
+ <label className="text-[9px] font-black text-gray-700 uppercase tracking-widest">Start Command:</label>
772
+ <div className="relative group">
773
+ <div className="p-3 bg-black rounded-xl border border-gray-900 font-mono text-[10px] text-green-500/80 break-all leading-relaxed shadow-inner">
774
+ {bridgeStartCommand}
775
+ </div>
776
+ <button onClick={() => navigator.clipboard.writeText(bridgeStartCommand)} className="absolute top-2 right-2 p-1.5 bg-gray-800 hover:bg-gray-700 text-gray-400 rounded-lg opacity-0 group-hover:opacity-100 transition-all"><CopyIcon className="w-3.5 h-3.5"/></button>
777
+ </div>
778
+ <p className="text-[9px] text-gray-600 italic">The bridge will proxy requests from this HTTPS app to your local HTTP ComfyUI server.</p>
779
+ </div>
780
+ </div>
781
+ )}
782
+ </div>
783
+ </div>
784
+ )}
785
+ </div>
786
+ </div>
787
+ </div>
788
+
789
+ <div className="border-t border-gray-800 pt-10 flex flex-col gap-6">
790
+ <div className="flex flex-wrap gap-4 justify-end">
791
+ <button
792
+ onClick={handleDeleteSelected}
793
+ disabled={selectedFiles.length === 0}
794
+ className="px-6 py-4 bg-red-600/20 hover:bg-red-600/30 border border-red-600/30 rounded-2xl text-[11px] font-black uppercase text-red-400 flex items-center gap-3 transition-all active:scale-95 shadow-lg disabled:opacity-20 disabled:grayscale"
795
+ >
796
+ <TrashIcon className="w-5 h-5"/> Delete Selected ({selectedFiles.length})
797
+ </button>
798
+ <button onClick={handleStopTasks} className="px-6 py-4 bg-orange-600/20 hover:bg-orange-600/40 border border-orange-600/30 rounded-2xl text-[11px] font-black uppercase text-orange-400 flex items-center gap-3 transition-all active:scale-95 shadow-lg"><StopIcon className="w-5 h-5"/> Stop Tasks</button>
799
+
800
+ <button onClick={handleBulkQualityCheck} disabled={selectedFiles.length === 0 || !hasValidConfig || isQueueRunning} className="px-6 py-4 bg-blue-600 hover:bg-blue-500 text-white rounded-2xl text-[11px] font-black uppercase flex items-center gap-4 transition-all shadow-xl active:scale-95 disabled:opacity-40">
801
+ <CheckCircleIcon className="w-5 h-5" /> Check Quality Selected ({selectedFiles.length})
802
+ </button>
803
+
804
+ <button onClick={handleBulkGenerate} disabled={selectedFiles.length === 0 || !hasValidConfig || isQueueRunning} className="px-10 py-4 bg-green-600 hover:bg-green-500 text-white rounded-2xl text-xs font-black uppercase flex items-center gap-4 transition-all shadow-2xl shadow-green-900/30 active:scale-95 disabled:opacity-40">
805
+ <SparklesIcon className="w-6 h-6" /> Generate Selected ({selectedFiles.length})
806
+ </button>
807
+
808
+ <button onClick={handleBulkRefine} disabled={selectedFiles.length === 0 || !hasValidConfig || isQueueRunning} className="px-10 py-4 bg-indigo-600 hover:bg-indigo-500 text-white rounded-2xl text-xs font-black uppercase flex items-center gap-4 transition-all shadow-xl active:scale-95 disabled:opacity-40">
809
+ <WandIcon className="w-6 h-6" /> Refine Selected ({selectedFiles.length})
810
+ </button>
811
+ </div>
812
+ <div className="flex flex-wrap gap-4 justify-end">
813
+ {isComfyEnabled && (
814
+ <button onClick={handleBulkPreview} disabled={selectedFiles.length === 0} className="px-10 py-4 bg-orange-600 hover:bg-orange-500 text-white rounded-2xl text-xs font-black uppercase flex items-center gap-4 transition-all shadow-xl shadow-orange-900/20 active:scale-95 disabled:opacity-40">
815
+ <WandIcon className="w-6 h-6" /> Preview Selected ({selectedFiles.length})
816
+ </button>
817
+ )}
818
+ <button onClick={handleExportDataset} disabled={selectedFiles.length === 0 || isExporting} className="w-full sm:w-auto px-16 py-5 bg-indigo-700 hover:bg-indigo-600 text-white rounded-2xl text-xs font-black uppercase flex items-center justify-center gap-4 transition-all shadow-2xl active:scale-95 disabled:opacity-40">
819
+ {isExporting ? <LoaderIcon className="w-6 h-6 animate-spin" /> : <DownloadIcon className="w-6 h-6" />}
820
+ {isExporting ? 'Packaging ZIP...' : 'Download Finished Dataset'}
821
+ </button>
822
+ </div>
823
+ </div>
824
+ </section>
825
+
826
+ <section className="bg-gray-900 border border-gray-800 p-8 rounded-3xl shadow-xl overflow-hidden relative">
827
+ <div className="absolute top-0 right-0 p-8 opacity-10 pointer-events-none"><UploadCloudIcon className="w-32 h-32" /></div>
828
+ <h2 className="text-xl font-black mb-6 uppercase tracking-widest text-gray-400">2. Upload Source Media</h2>
829
+ <FileUploader onFilesAdded={handleFilesAdded} />
830
+ </section>
831
+
832
+ <section className="space-y-8 animate-slide-up min-h-[400px]">
833
+ {mediaFiles && mediaFiles.length > 0 ? (
834
+ <>
835
+ <div className="flex justify-between items-center bg-gray-900/80 backdrop-blur-2xl p-6 rounded-3xl border border-gray-800 sticky top-4 z-40 shadow-[0_20px_50px_-10px_rgba(0,0,0,0.5)]">
836
+ <div className="flex items-center gap-4">
837
+ <div className="h-10 w-1.5 bg-indigo-500 rounded-full shadow-[0_0_15px_rgba(99,102,241,0.5)]"></div>
838
+ <div className="flex flex-col">
839
+ <h2 className="text-2xl font-black text-white uppercase tracking-tighter leading-none">3. Data Curation Workspace</h2>
840
+ <p className="text-[10px] font-black text-gray-600 uppercase tracking-widest mt-1">Ready for Parallel Processing ({mediaFiles.length} Loaded)</p>
841
+ </div>
842
+ </div>
843
+ <div className="flex items-center gap-6">
844
+ <div className="flex items-center gap-3 bg-black px-6 py-3 rounded-2xl border border-gray-800 shadow-inner group active:scale-95 transition-all">
845
+ <input type="checkbox" id="sel-all" className="h-6 w-6 rounded-lg bg-gray-900 border-gray-700 text-indigo-600 transition-all cursor-pointer shadow-sm" checked={mediaFiles.length > 0 && mediaFiles.every(f => f.isSelected)} onChange={(e) => setMediaFiles(prev => (prev || []).map(mf => ({ ...mf, isSelected: e.target.checked })))} />
846
+ <label htmlFor="sel-all" className="text-xs font-black text-gray-500 cursor-pointer group-hover:text-gray-300 transition-colors uppercase tracking-widest">Select All Items</label>
847
+ </div>
848
+ </div>
849
+ </div>
850
+ <div className="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-12">
851
+ {mediaFiles.map(item => (
852
+ <MediaItem
853
+ key={item.id}
854
+ item={item}
855
+ autofit={autofitTextareas}
856
+ isApiKeySet={hasValidConfig}
857
+ isComfyEnabled={isComfyEnabled}
858
+ showSideBySidePreview={showSideBySidePreview}
859
+ onGenerate={handleGenerateCaption}
860
+ onCheckQuality={handleCheckQuality}
861
+ onPreview={handleComfyPreview}
862
+ onCaptionChange={(id, cap) => updateFile(id, { caption: cap })}
863
+ onCustomInstructionsChange={(id, ins) => updateFile(id, { customInstructions: ins })}
864
+ onSelectionChange={(id, sel) => updateFile(id, { isSelected: sel })}
865
+ onOpenPreviewModal={setActivePreviewId}
866
+ />
867
+ ))}
868
+ </div>
869
+ </>
870
+ ) : (
871
+ <div className="flex flex-col items-center justify-center py-32 bg-gray-900/50 rounded-3xl border-2 border-dashed border-gray-800 text-gray-500 animate-pulse">
872
+ <UploadCloudIcon className="w-16 h-16 mb-6 opacity-20" />
873
+ <h3 className="text-lg font-black uppercase tracking-widest text-gray-700">No items uploaded yet</h3>
874
+ <p className="text-xs mt-2 uppercase tracking-tight text-gray-600">Start by dropping files into the upload zone above</p>
875
+ </div>
876
+ )}
877
+ </section>
878
+ </main>
879
+ </div>
880
+ );
881
+ };
882
+
883
+ export default App;
Dockerfile.txt ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM node:18-alpine
2
+
3
+ WORKDIR /app
4
+
5
+ # Copy package.json first to ensure it exists for npm install
6
+ COPY package.json ./
7
+
8
+ # Install dependencies
9
+ RUN npm install
10
+
11
+ # Copy the rest of the application code
12
+ COPY . .
13
+
14
+ # Build the React application
15
+ # This will use the API_KEY environment variable if provided during build
16
+ RUN npm run build
17
+
18
+ # Expose port 7860 (Required by Hugging Face Spaces)
19
+ EXPOSE 7860
20
+
21
+ # Start the Vite preview server
22
+ CMD ["npm", "run", "preview"]
README.md CHANGED
@@ -1,10 +1,127 @@
 
1
  ---
2
- title: Loracaptionertaz
3
- emoji: 🏃
4
- colorFrom: green
5
- colorTo: pink
6
  sdk: docker
7
- pinned: false
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  ---
9
 
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
  ---
3
+ title: LoRA Caption Assistant
4
+ emoji: 🖼️
5
+ colorFrom: gray
6
+ colorTo: indigo
7
  sdk: docker
8
+ app_port: 7860
9
+ ---
10
+
11
+ # LoRA Caption Assistant
12
+
13
+ An AI-powered web application designed to assist in generating high-quality, detailed captions for image and video datasets. This tool is specifically tailored for training LoRA (Low-Rank Adaptation) models, utilizing Google's Gemini API or a Local Qwen Model (via vLLM) to automate the captioning process.
14
+
15
+ ## Features
16
+
17
+ * **Automated Captioning**: Generates detailed, objective descriptions using Gemini 2.5 Pro or local Qwen-VL.
18
+ * **LoRA Optimized**: Automatic trigger word insertion and style-agnostic descriptions.
19
+ * **Multi-Modal**: Supports both image and video inputs.
20
+ * **Character Tagging**: Optional automatic identification and tagging of specific characters.
21
+ * **Quality Assurance**: AI-powered scoring system to evaluate caption quality (1-5 scale).
22
+ * **Batch Processing**: Robust queue system with rate limiting (RPM) and batch sizes.
23
+ * **Export**: Downloads the dataset (media + text files) as a ZIP file.
24
+
25
+ ---
26
+
27
+ ## 🚀 Deployment on Hugging Face Spaces
28
+
29
+ This is the recommended way to run the application if you don't have a GPU.
30
+
31
+ ### Step 1: Create a Space
32
+ 1. Go to [Hugging Face Spaces](https://huggingface.co/spaces).
33
+ 2. Click **Create new Space**.
34
+ 3. Enter a name (e.g., `lora-caption-assistant`).
35
+ 4. Select **Docker** as the SDK.
36
+ 5. Choose "Blank" or "Public" template.
37
+ 6. Click **Create Space**.
38
+
39
+ ### Step 2: Upload Files
40
+ Upload the contents of this repository to your Space. Ensure the following files are in the **root** directory:
41
+ * `Dockerfile` (Critical: The app will fail without this)
42
+ * `package.json`
43
+ * `vite.config.ts`
44
+ * `index.html`
45
+ * `src/` folder (containing `App.tsx`, etc.)
46
+
47
+ ### Step 3: Configure API Key (For Gemini)
48
+ 1. In your Space, go to **Settings**.
49
+ 2. Scroll to **Variables and secrets**.
50
+ 3. Click **New secret**.
51
+ 4. **Name**: `API_KEY`
52
+ 5. **Value**: Your Google Gemini API Key.
53
+
54
  ---
55
 
56
+ ## 🤖 Local Qwen Setup Guide
57
+
58
+ If you have a powerful NVIDIA GPU (12GB+ VRAM recommended), you can run the captioning model **locally for free** and connect this web app to it.
59
+
60
+ ### Prerequisites
61
+ * **OS**: Windows or Linux
62
+ * **GPU**: NVIDIA GPU (CUDA support)
63
+ * **Software**: Python 3.10+ and CUDA Toolkit installed.
64
+
65
+ ### Step 1: Get the Script
66
+ 1. Open the LoRA Caption Assistant Web App.
67
+ 2. Under **AI Provider**, select **Local Qwen (GPU)**.
68
+ 3. Select your desired model (e.g., `Qwen 2.5 VL 7B`).
69
+ 4. Set your desired install folder path.
70
+ 5. Click **Download Setup Script**.
71
+
72
+ ### Step 2: Run the Server
73
+ 1. Locate the downloaded `.bat` (Windows) or `.sh` (Linux) file.
74
+ 2. Run it.
75
+ 3. The script will:
76
+ * Create a Python virtual environment.
77
+ * Install `vllm`.
78
+ * Download the selected Qwen model from Hugging Face.
79
+ * Start an OpenAI-compatible API server on port 8000.
80
+
81
+ ### Step 3: Connect to the App
82
+
83
+ **Scenario A: Running App Locally (localhost)**
84
+ * If you are running this web app on your own computer (`npm run dev`), simply set the Endpoint in the app to: `http://localhost:8000/v1`
85
+
86
+ **Scenario B: Running App on Hugging Face (HTTPS)**
87
+ * If you are accessing the web app via Hugging Face Spaces, you **cannot** connect to `localhost` directly due to browser security (Mixed Content Blocking).
88
+ * You must create a secure tunnel.
89
+
90
+ **How to Tunnel:**
91
+ 1. **Cloudflare Tunnel (Easiest)**:
92
+ * Download `cloudflared`.
93
+ * Run: `cloudflared tunnel --url http://localhost:8000`
94
+ * Copy the URL ending in `.trycloudflare.com`.
95
+ 2. **Paste the URL**:
96
+ * Paste this secure URL into the **Local Endpoint** field in the Web App.
97
+ * Add `/v1` to the end (e.g., `https://example.trycloudflare.com/v1`).
98
+
99
+ ---
100
+
101
+ ## 💻 Local Development (Web App)
102
+
103
+ ### Prerequisites
104
+ * Node.js (v18+)
105
+ * npm
106
+
107
+ ### Installation
108
+ 1. Clone the repo:
109
+ ```bash
110
+ git clone <your-repo-url>
111
+ cd lora-caption-assistant
112
+ ```
113
+
114
+ 2. Install dependencies:
115
+ ```bash
116
+ npm install
117
+ ```
118
+
119
+ 3. Run the app:
120
+ ```bash
121
+ npm run dev
122
+ ```
123
+ Open `http://localhost:5173` in your browser.
124
+
125
+ ## License
126
+
127
+ MIT
components/FileUploader.tsx ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import React, { useState, useCallback, useRef } from 'react';
3
+ import { UploadCloudIcon } from './Icons';
4
+
5
+ interface FileUploaderProps {
6
+ onFilesAdded: (files: File[]) => void;
7
+ }
8
+
9
+ const FileUploader: React.FC<FileUploaderProps> = ({ onFilesAdded }) => {
10
+ const [isDragging, setIsDragging] = useState(false);
11
+ const fileInputRef = useRef<HTMLInputElement>(null);
12
+
13
+ const handleDragEnter = useCallback((e: React.DragEvent<HTMLDivElement>) => {
14
+ e.preventDefault();
15
+ e.stopPropagation();
16
+ setIsDragging(true);
17
+ }, []);
18
+
19
+ const handleDragLeave = useCallback((e: React.DragEvent<HTMLDivElement>) => {
20
+ e.preventDefault();
21
+ e.stopPropagation();
22
+ setIsDragging(false);
23
+ }, []);
24
+
25
+ const handleDragOver = useCallback((e: React.DragEvent<HTMLDivElement>) => {
26
+ e.preventDefault();
27
+ e.stopPropagation();
28
+ }, []);
29
+
30
+ const handleDrop = useCallback((e: React.DragEvent<HTMLDivElement>) => {
31
+ e.preventDefault();
32
+ e.stopPropagation();
33
+ setIsDragging(false);
34
+ const files = Array.from(e.dataTransfer.files);
35
+ if (files && files.length > 0) {
36
+ onFilesAdded(files);
37
+ }
38
+ }, [onFilesAdded]);
39
+
40
+ const handleFileSelect = (e: React.ChangeEvent<HTMLInputElement>) => {
41
+ const files = Array.from(e.target.files || []);
42
+ if (files && files.length > 0) {
43
+ onFilesAdded(files);
44
+ }
45
+ };
46
+
47
+ const openFileDialog = () => {
48
+ fileInputRef.current?.click();
49
+ };
50
+
51
+ return (
52
+ <div
53
+ className={`relative border-2 border-dashed rounded-lg p-12 text-center transition-colors duration-200 ease-in-out ${
54
+ isDragging ? 'border-indigo-400 bg-gray-800' : 'border-gray-600 hover:border-indigo-500'
55
+ }`}
56
+ onDragEnter={handleDragEnter}
57
+ onDragLeave={handleDragLeave}
58
+ onDragOver={handleDragOver}
59
+ onDrop={handleDrop}
60
+ onClick={openFileDialog}
61
+ >
62
+ <input
63
+ ref={fileInputRef}
64
+ type="file"
65
+ multiple
66
+ accept="image/*,video/*,.txt"
67
+ className="hidden"
68
+ onChange={handleFileSelect}
69
+ />
70
+ <div className="flex flex-col items-center justify-center space-y-4">
71
+ <UploadCloudIcon className="w-12 h-12 text-gray-400" />
72
+ <p className="text-gray-400">
73
+ <span className="font-semibold text-indigo-400">Click to upload</span> or drag and drop
74
+ </p>
75
+ <p className="text-xs text-gray-500">Upload media and optional .txt captions (must have matching filenames)</p>
76
+ </div>
77
+ </div>
78
+ );
79
+ };
80
+
81
+ export default FileUploader;
components/Icons.tsx ADDED
@@ -0,0 +1,212 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import React from 'react';
3
+
4
+ export const UploadCloudIcon = (props: React.SVGProps<SVGSVGElement>) => (
5
+ <svg
6
+ {...props}
7
+ xmlns="http://www.w3.org/2000/svg"
8
+ width="24"
9
+ height="24"
10
+ viewBox="0 0 24 24"
11
+ fill="none"
12
+ stroke="currentColor"
13
+ strokeWidth="2"
14
+ strokeLinecap="round"
15
+ strokeLinejoin="round"
16
+ >
17
+ <path d="M4 14.899A7 7 0 1 1 15.71 8h1.79a4.5 4.5 0 0 1 2.5 8.242" />
18
+ <path d="M12 12v9" />
19
+ <path d="m16 16-4-4-4 4" />
20
+ </svg>
21
+ );
22
+
23
+ export const SparklesIcon = (props: React.SVGProps<SVGSVGElement>) => (
24
+ <svg
25
+ {...props}
26
+ xmlns="http://www.w3.org/2000/svg"
27
+ width="24"
28
+ height="24"
29
+ viewBox="0 0 24 24"
30
+ fill="none"
31
+ stroke="currentColor"
32
+ strokeWidth="2"
33
+ strokeLinecap="round"
34
+ strokeLinejoin="round"
35
+ >
36
+ <path d="m12 3-1.912 5.813a2 2 0 0 1-1.275 1.275L3 12l5.813 1.912a2 2 0 0 1 1.275 1.275L12 21l1.912-5.813a2 2 0 0 1 1.275-1.275L21 12l-5.813-1.912a2 2 0 0 1-1.275-1.275L12 3Z" />
37
+ <path d="M5 3v4" />
38
+ <path d="M19 17v4" />
39
+ <path d="M3 5h4" />
40
+ <path d="M17 19h4" />
41
+ </svg>
42
+ );
43
+
44
+ export const DownloadIcon = (props: React.SVGProps<SVGSVGElement>) => (
45
+ <svg
46
+ {...props}
47
+ xmlns="http://www.w3.org/2000/svg"
48
+ width="24"
49
+ height="24"
50
+ viewBox="0 0 24 24"
51
+ fill="none"
52
+ stroke="currentColor"
53
+ strokeWidth="2"
54
+ strokeLinecap="round"
55
+ strokeLinejoin="round"
56
+ >
57
+ <path d="M21 15v4a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2v-4" />
58
+ <polyline points="7 10 12 15 17 10" />
59
+ <line x1="12" x2="12" y1="15" y2="3" />
60
+ </svg>
61
+ );
62
+
63
+ export const LoaderIcon = (props: React.SVGProps<SVGSVGElement>) => (
64
+ <svg
65
+ {...props}
66
+ xmlns="http://www.w3.org/2000/svg"
67
+ width="24"
68
+ height="24"
69
+ viewBox="0 0 24 24"
70
+ fill="none"
71
+ stroke="currentColor"
72
+ strokeWidth="2"
73
+ strokeLinecap="round"
74
+ strokeLinejoin="round"
75
+ >
76
+ <line x1="12" y1="2" x2="12" y2="6" />
77
+ <line x1="12" y1="18" x2="12" y2="22" />
78
+ <line x1="4.93" y1="4.93" x2="7.76" y2="7.76" />
79
+ <line x1="16.24" y1="16.24" x2="19.07" y2="19.07" />
80
+ <line x1="2" y1="12" x2="6" y2="12" />
81
+ <line x1="18" y1="12" x2="22" y2="12" />
82
+ <line x1="4.93" y1="19.07" x2="7.76" y2="16.24" />
83
+ <line x1="16.24" y1="7.76" x2="19.07" y2="4.93" />
84
+ </svg>
85
+ );
86
+
87
+ export const WandIcon = (props: React.SVGProps<SVGSVGElement>) => (
88
+ <svg
89
+ {...props}
90
+ xmlns="http://www.w3.org/2000/svg"
91
+ width="24"
92
+ height="24"
93
+ viewBox="0 0 24 24"
94
+ fill="none"
95
+ stroke="currentColor"
96
+ strokeWidth="2"
97
+ strokeLinecap="round"
98
+ strokeLinejoin="round">
99
+ <path d="M15 4V2"/><path d="M15 16v-2"/><path d="M8 9h2"/>
100
+ <path d="M20 9h2"/><path d="M17.8 11.8 19 13"/>
101
+ <path d="M15 9h0"/><path d="M17.8 6.2 19 5"/>
102
+ <path d="m3 21 9-9"/><path d="M12.2 6.2 11 5"/>
103
+ </svg>
104
+ );
105
+
106
+ export const CheckCircleIcon = (props: React.SVGProps<SVGSVGElement>) => (
107
+ <svg
108
+ {...props}
109
+ xmlns="http://www.w3.org/2000/svg"
110
+ width="24"
111
+ height="24"
112
+ viewBox="0 0 24 24"
113
+ fill="none"
114
+ stroke="currentColor"
115
+ strokeWidth="2"
116
+ strokeLinecap="round"
117
+ strokeLinejoin="round"
118
+ >
119
+ <path d="M22 11.08V12a10 10 0 1 1-5.93-9.14" />
120
+ <polyline points="22 4 12 14.01 9 11.01" />
121
+ </svg>
122
+ );
123
+
124
+ export const TrashIcon = (props: React.SVGProps<SVGSVGElement>) => (
125
+ <svg
126
+ {...props}
127
+ xmlns="http://www.w3.org/2000/svg"
128
+ width="24"
129
+ height="24"
130
+ viewBox="0 0 24 24"
131
+ fill="none"
132
+ stroke="currentColor"
133
+ strokeWidth="2"
134
+ strokeLinecap="round"
135
+ strokeLinejoin="round"
136
+ >
137
+ <path d="M3 6h18" />
138
+ <path d="M19 6v14a2 2 0 0 1-2 2H7a2 2 0 0 1-2-2V6m3 0V4a2 2 0 0 1 2-2h4a2 2 0 0 1 2 2v2" />
139
+ </svg>
140
+ );
141
+
142
+ export const XIcon = (props: React.SVGProps<SVGSVGElement>) => (
143
+ <svg
144
+ {...props}
145
+ xmlns="http://www.w3.org/2000/svg"
146
+ width="24"
147
+ height="24"
148
+ viewBox="0 0 24 24"
149
+ fill="none"
150
+ stroke="currentColor"
151
+ strokeWidth="2"
152
+ strokeLinecap="round"
153
+ strokeLinejoin="round"
154
+ >
155
+ <path d="M18 6L6 18" />
156
+ <path d="M6 6l12 12" />
157
+ </svg>
158
+ );
159
+
160
+ export const AlertTriangleIcon = (props: React.SVGProps<SVGSVGElement>) => (
161
+ <svg
162
+ {...props}
163
+ xmlns="http://www.w3.org/2000/svg"
164
+ width="24"
165
+ height="24"
166
+ viewBox="0 0 24 24"
167
+ fill="none"
168
+ stroke="currentColor"
169
+ strokeWidth="2"
170
+ strokeLinecap="round"
171
+ strokeLinejoin="round"
172
+ >
173
+ <path d="m21.73 18-8-14a2 2 0 0 0-3.48 0l-8 14A2 2 0 0 0 4 21h16a2 2 0 0 0 1.73-3Z" />
174
+ <path d="M12 9v4" />
175
+ <path d="M12 17h.01" />
176
+ </svg>
177
+ );
178
+
179
+ export const CopyIcon = (props: React.SVGProps<SVGSVGElement>) => (
180
+ <svg
181
+ {...props}
182
+ xmlns="http://www.w3.org/2000/svg"
183
+ width="24"
184
+ height="24"
185
+ viewBox="0 0 24 24"
186
+ fill="none"
187
+ stroke="currentColor"
188
+ strokeWidth="2"
189
+ strokeLinecap="round"
190
+ strokeLinejoin="round"
191
+ >
192
+ <rect width="14" height="14" x="8" y="8" rx="2" ry="2" />
193
+ <path d="M4 16c-1.1 0-2-.9-2-2V4c0-1.1.9-2 2-2h10c1.1 0 2 .9 2 2" />
194
+ </svg>
195
+ );
196
+
197
+ export const StopIcon = (props: React.SVGProps<SVGSVGElement>) => (
198
+ <svg
199
+ {...props}
200
+ xmlns="http://www.w3.org/2000/svg"
201
+ width="24"
202
+ height="24"
203
+ viewBox="0 0 24 24"
204
+ fill="none"
205
+ stroke="currentColor"
206
+ strokeWidth="2"
207
+ strokeLinecap="round"
208
+ strokeLinejoin="round"
209
+ >
210
+ <rect x="6" y="6" width="12" height="12" />
211
+ </svg>
212
+ );
components/MediaItem.tsx ADDED
@@ -0,0 +1,233 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import React from 'react';
2
+ import type { MediaFile } from '../types';
3
+ import { GenerationStatus } from '../types';
4
+ import { SparklesIcon, LoaderIcon, WandIcon, CheckCircleIcon } from './Icons';
5
+
6
+ interface MediaItemProps {
7
+ item: MediaFile;
8
+ autofit: boolean;
9
+ isApiKeySet: boolean;
10
+ isComfyEnabled: boolean;
11
+ showSideBySidePreview: boolean;
12
+ onGenerate: (id: string, customInstructions?: string) => void;
13
+ onCheckQuality: (id: string) => void;
14
+ onPreview: (id: string) => void;
15
+ onCaptionChange: (id:string, caption: string) => void;
16
+ onCustomInstructionsChange: (id: string, instructions: string) => void;
17
+ onSelectionChange: (id: string, isSelected: boolean) => void;
18
+ onOpenPreviewModal: (id: string) => void;
19
+ }
20
+
21
+ const getScoreColor = (score?: number) => {
22
+ if (score === undefined) return 'text-gray-500';
23
+ if (score >= 4) return 'text-green-400';
24
+ if (score >= 3) return 'text-yellow-400';
25
+ return 'text-red-400';
26
+ };
27
+
28
+
29
+ const MediaItem: React.FC<MediaItemProps> = ({
30
+ item,
31
+ autofit,
32
+ isApiKeySet,
33
+ isComfyEnabled,
34
+ showSideBySidePreview,
35
+ onGenerate,
36
+ onCheckQuality,
37
+ onPreview,
38
+ onCaptionChange,
39
+ onCustomInstructionsChange,
40
+ onSelectionChange,
41
+ onOpenPreviewModal
42
+ }) => {
43
+ const isVideo = item.file.type.startsWith('video/');
44
+ const textareaRef = React.useRef<HTMLTextAreaElement>(null);
45
+
46
+ React.useEffect(() => {
47
+ if (textareaRef.current && autofit) {
48
+ textareaRef.current.style.height = 'auto'; // Reset height
49
+ textareaRef.current.style.height = `${textareaRef.current.scrollHeight}px`;
50
+ } else if (textareaRef.current) {
51
+ textareaRef.current.style.height = ''; // Revert to CSS-defined height
52
+ }
53
+ }, [item.caption, autofit]);
54
+
55
+ const getStatusColor = () => {
56
+ switch(item.status) {
57
+ case GenerationStatus.SUCCESS: return 'border-green-500';
58
+ case GenerationStatus.ERROR: return 'border-red-500';
59
+ case GenerationStatus.GENERATING: return 'border-indigo-500';
60
+ case GenerationStatus.CHECKING: return 'border-yellow-500';
61
+ default: return 'border-gray-700';
62
+ }
63
+ };
64
+
65
+ const isProcessing = item.status === GenerationStatus.GENERATING || item.status === GenerationStatus.CHECKING;
66
+ const isPreviewing = item.comfyStatus === 'generating';
67
+ const hasPreview = !!item.comfyPreviewUrl;
68
+
69
+ const renderMedia = (url: string, isOriginal: boolean) => {
70
+ const isVideoFile = isOriginal && isVideo;
71
+ return (
72
+ <div className="relative flex-1 bg-gray-900 rounded-md overflow-hidden flex flex-col group/media shadow-inner cursor-pointer" onClick={() => onOpenPreviewModal(item.id)}>
73
+ <div className="flex-grow flex items-center justify-center min-h-[160px] h-full">
74
+ {isVideoFile ? (
75
+ <video src={url} className="max-w-full max-h-full object-contain" />
76
+ ) : (
77
+ <img src={url} alt={item.file.name} className="max-w-full max-h-full object-contain" />
78
+ )}
79
+ </div>
80
+ <div className="absolute bottom-1 left-1 px-1.5 py-0.5 bg-black/60 backdrop-blur-sm rounded text-[9px] font-black uppercase tracking-widest text-white/90 border border-white/10 opacity-80 pointer-events-none">
81
+ {isOriginal ? 'Original' : 'Preview'}
82
+ </div>
83
+ </div>
84
+ );
85
+ };
86
+
87
+ return (
88
+ <div className={`bg-gray-800 rounded-lg overflow-hidden border-2 transition-all ${getStatusColor()}`}>
89
+ <div className="relative p-2 space-y-2">
90
+ <input
91
+ type="checkbox"
92
+ checked={item.isSelected}
93
+ onChange={(e) => onSelectionChange(item.id, e.target.checked)}
94
+ className="absolute top-4 left-4 h-6 w-6 bg-gray-900/80 backdrop-blur-sm border-gray-600 text-indigo-500 rounded focus:ring-indigo-600 z-10 cursor-pointer shadow-lg"
95
+ />
96
+ {item.qualityScore !== undefined && (
97
+ <div className="absolute top-4 right-4 bg-gray-900/70 backdrop-blur-sm px-3 py-1 rounded-full text-sm font-semibold flex items-center gap-1.5 z-10 shadow-sm border border-white/5">
98
+ <span className={`tracking-widest ${getScoreColor(item.qualityScore)}`}>
99
+ {'★'.repeat(item.qualityScore)}{'☆'.repeat(5 - item.qualityScore)}
100
+ </span>
101
+ <span className="text-gray-300 text-[10px]">{item.qualityScore}/5</span>
102
+ </div>
103
+ )}
104
+
105
+ {hasPreview && !showSideBySidePreview && (
106
+ <button
107
+ onClick={() => onOpenPreviewModal(item.id)}
108
+ className="absolute bottom-4 right-4 bg-orange-600 hover:bg-orange-500 text-white px-3 py-1.5 rounded-md text-[10px] font-black uppercase tracking-wider z-10 shadow-xl transition-all hover:scale-105 active:scale-95 border border-white/10"
109
+ >
110
+ View Comparison
111
+ </button>
112
+ )}
113
+
114
+ <div className={`h-64 flex gap-2 ${showSideBySidePreview && hasPreview ? 'flex-row' : 'flex-col'}`}>
115
+ {showSideBySidePreview && hasPreview ? (
116
+ <>
117
+ {renderMedia(item.previewUrl, true)}
118
+ {renderMedia(item.comfyPreviewUrl!, false)}
119
+ </>
120
+ ) : (
121
+ <div
122
+ className="flex-grow flex items-center justify-center bg-gray-900 rounded-md overflow-hidden relative group/single cursor-pointer"
123
+ onClick={() => onOpenPreviewModal(item.id)}
124
+ >
125
+ {isVideo ? (
126
+ <video src={item.previewUrl} className="max-w-full max-h-full object-contain" />
127
+ ) : (
128
+ <img src={item.previewUrl} alt={item.file.name} className="max-w-full max-h-full object-contain" />
129
+ )}
130
+ <div className="absolute inset-0 bg-black/0 group-hover/single:bg-black/20 transition-colors flex items-center justify-center">
131
+ <SparklesIcon className="w-8 h-8 text-white opacity-0 group-hover/single:opacity-100 transition-all scale-75 group-hover/single:scale-100" />
132
+ </div>
133
+ </div>
134
+ )}
135
+ </div>
136
+ </div>
137
+
138
+ <div className="p-4 space-y-4">
139
+ <div className="flex justify-between items-start gap-2">
140
+ <p className="text-sm text-gray-400 truncate flex-grow font-mono" title={item.file.name}>{item.file.name}</p>
141
+ <span className="text-[10px] text-gray-600 font-bold uppercase tracking-widest whitespace-nowrap">
142
+ {isVideo ? 'Video' : 'Image'}
143
+ </span>
144
+ </div>
145
+
146
+ <textarea
147
+ ref={textareaRef}
148
+ value={item.caption}
149
+ onChange={(e) => onCaptionChange(item.id, e.target.value)}
150
+ placeholder="Generated caption will appear here..."
151
+ rows={!autofit ? 6 : 1}
152
+ className={`w-full p-2.5 bg-gray-900 border border-gray-700 rounded-md focus:ring-2 focus:ring-indigo-500 focus:border-indigo-500 transition-all resize-none overflow-hidden text-[13px] leading-relaxed text-gray-200 ${!autofit ? 'h-32' : ''}`}
153
+ />
154
+
155
+ <div className="flex flex-col gap-2">
156
+ <input
157
+ type="text"
158
+ placeholder="Custom instructions for refinement..."
159
+ value={item.customInstructions}
160
+ onChange={(e) => onCustomInstructionsChange(item.id, e.target.value)}
161
+ className="w-full p-2.5 bg-gray-700 border border-gray-600 rounded-md focus:ring-2 focus:ring-indigo-500 focus:border-indigo-500 transition-all text-xs"
162
+ />
163
+ <div className="flex flex-wrap gap-2">
164
+ <button
165
+ onClick={() => onGenerate(item.id, item.customInstructions)}
166
+ disabled={isProcessing || !isApiKeySet}
167
+ className="flex-1 flex items-center justify-center px-3 py-2 bg-green-600 text-white rounded-md hover:bg-green-700 disabled:bg-gray-700 disabled:cursor-not-allowed transition-all text-[11px] font-black uppercase tracking-wider shadow-lg shadow-green-900/10"
168
+ >
169
+ {isProcessing ? (
170
+ <LoaderIcon className="w-4 h-4 animate-spin mr-2" />
171
+ ) : (
172
+ item.customInstructions ? <WandIcon className="w-4 h-4 mr-2" /> : <SparklesIcon className="w-4 h-4 mr-2" />
173
+ )}
174
+ <span>
175
+ {item.status === GenerationStatus.GENERATING ? 'Working...' :
176
+ item.status === GenerationStatus.CHECKING ? 'Checking...' :
177
+ item.customInstructions ? 'Refine' : 'Generate'}
178
+ </span>
179
+ </button>
180
+
181
+ <button
182
+ onClick={() => onCheckQuality(item.id)}
183
+ disabled={isProcessing || !isApiKeySet || !item.caption}
184
+ className="flex-1 flex items-center justify-center px-3 py-2 bg-blue-600 text-white rounded-md hover:bg-blue-700 disabled:bg-gray-700 disabled:cursor-not-allowed transition-all text-[11px] font-black uppercase tracking-wider shadow-lg shadow-blue-900/10"
185
+ >
186
+ <CheckCircleIcon className="w-4 h-4 mr-2" />
187
+ <span>Check Quality</span>
188
+ </button>
189
+
190
+ {isComfyEnabled && (
191
+ <button
192
+ onClick={() => onPreview(item.id)}
193
+ disabled={isPreviewing || !item.caption}
194
+ className={`flex-shrink-0 flex items-center justify-center px-4 py-2 text-white rounded-md transition-all text-[11px] font-black uppercase tracking-wider shadow-lg ${item.comfyStatus === 'error' ? 'bg-red-600 hover:bg-red-700 shadow-red-900/10' : 'bg-orange-600 hover:bg-orange-700 shadow-orange-900/10'} disabled:bg-gray-500 disabled:cursor-not-allowed`}
195
+ title={item.comfyErrorMessage || "Generate preview with ComfyUI"}
196
+ >
197
+ {isPreviewing ? (
198
+ <LoaderIcon className="w-4 h-4 animate-spin mr-2" />
199
+ ) : (
200
+ <SparklesIcon className="w-4 h-4 mr-2" />
201
+ )}
202
+ <span>Preview</span>
203
+ </button>
204
+ )}
205
+ </div>
206
+
207
+ {isComfyEnabled && item.comfyStatus === 'error' && (
208
+ <div className="bg-red-900/20 p-2 rounded border border-red-500/30">
209
+ <p className="text-[10px] text-red-400 leading-tight">
210
+ <span className="font-bold uppercase tracking-tighter">Bridge/Server Error:</span> {item.comfyErrorMessage}
211
+ </p>
212
+ </div>
213
+ )}
214
+ {isComfyEnabled && item.comfyStatus === 'generating' && (
215
+ <div className="bg-orange-900/20 p-2 rounded border border-orange-500/30 animate-pulse">
216
+ <p className="text-[10px] text-orange-400 font-bold uppercase text-center tracking-widest">
217
+ Queueing in ComfyUI...
218
+ </p>
219
+ </div>
220
+ )}
221
+ </div>
222
+
223
+ {item.status === GenerationStatus.ERROR && (
224
+ <p className="text-[11px] text-red-400 mt-1 italic font-medium leading-tight">
225
+ <span className="font-black uppercase tracking-tighter mr-1">Error:</span> {item.errorMessage}
226
+ </p>
227
+ )}
228
+ </div>
229
+ </div>
230
+ );
231
+ };
232
+
233
+ export default MediaItem;
constants/defaultWorkflow.ts ADDED
@@ -0,0 +1,155 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ export const DEFAULT_COMFY_WORKFLOW = {
2
+ "3": {
3
+ "inputs": {
4
+ "seed": 912870720080529,
5
+ "steps": 8,
6
+ "cfg": 1,
7
+ "sampler_name": "euler",
8
+ "scheduler": "simple",
9
+ "denoise": 1,
10
+ "model": [
11
+ "66",
12
+ 0
13
+ ],
14
+ "positive": [
15
+ "6",
16
+ 0
17
+ ],
18
+ "negative": [
19
+ "7",
20
+ 0
21
+ ],
22
+ "latent_image": [
23
+ "58",
24
+ 0
25
+ ]
26
+ },
27
+ "class_type": "KSampler",
28
+ "_meta": {
29
+ "title": "KSampler"
30
+ }
31
+ },
32
+ "6": {
33
+ "inputs": {
34
+ "text": "",
35
+ "clip": [
36
+ "38",
37
+ 0
38
+ ]
39
+ },
40
+ "class_type": "CLIPTextEncode",
41
+ "_meta": {
42
+ "title": "CLIP Text Encode (Positive Prompt)"
43
+ }
44
+ },
45
+ "7": {
46
+ "inputs": {
47
+ "text": "",
48
+ "clip": [
49
+ "38",
50
+ 0
51
+ ]
52
+ },
53
+ "class_type": "CLIPTextEncode",
54
+ "_meta": {
55
+ "title": "CLIP Text Encode (Negative Prompt)"
56
+ }
57
+ },
58
+ "8": {
59
+ "inputs": {
60
+ "samples": [
61
+ "3",
62
+ 0
63
+ ],
64
+ "vae": [
65
+ "39",
66
+ 0
67
+ ]
68
+ },
69
+ "class_type": "VAEDecode",
70
+ "_meta": {
71
+ "title": "VAE Decode"
72
+ }
73
+ },
74
+ "37": {
75
+ "inputs": {
76
+ "unet_name": "qwen_image_fp8_e4m3fn.safetensors",
77
+ "weight_dtype": "default"
78
+ },
79
+ "class_type": "UNETLoader",
80
+ "_meta": {
81
+ "title": "Load Diffusion Model"
82
+ }
83
+ },
84
+ "38": {
85
+ "inputs": {
86
+ "clip_name": "qwen_2.5_vl_7b_fp8_scaled.safetensors",
87
+ "type": "qwen_image",
88
+ "device": "default"
89
+ },
90
+ "class_type": "CLIPLoader",
91
+ "_meta": {
92
+ "title": "Load CLIP"
93
+ }
94
+ },
95
+ "39": {
96
+ "inputs": {
97
+ "vae_name": "qwen_image_vae.safetensors"
98
+ },
99
+ "class_type": "VAELoader",
100
+ "_meta": {
101
+ "title": "Load VAE"
102
+ }
103
+ },
104
+ "58": {
105
+ "inputs": {
106
+ "width": 1024,
107
+ "height": 1024,
108
+ "batch_size": 1
109
+ },
110
+ "class_type": "EmptySD3LatentImage",
111
+ "_meta": {
112
+ "title": "EmptySD3LatentImage"
113
+ }
114
+ },
115
+ "60": {
116
+ "inputs": {
117
+ "filename_prefix": "ComfyUI",
118
+ "images": [
119
+ "8",
120
+ 0
121
+ ]
122
+ },
123
+ "class_type": "SaveImage",
124
+ "_meta": {
125
+ "title": "Save Image"
126
+ }
127
+ },
128
+ "66": {
129
+ "inputs": {
130
+ "shift": 3,
131
+ "model": [
132
+ "73",
133
+ 0
134
+ ]
135
+ },
136
+ "class_type": "ModelSamplingAuraFlow",
137
+ "_meta": {
138
+ "title": "ModelSamplingAuraFlow"
139
+ }
140
+ },
141
+ "73": {
142
+ "inputs": {
143
+ "lora_name": "Qwen-Image-Lightning-8steps-V2.0.safetensors",
144
+ "strength_model": 1,
145
+ "model": [
146
+ "37",
147
+ 0
148
+ ]
149
+ },
150
+ "class_type": "LoraLoaderModelOnly",
151
+ "_meta": {
152
+ "title": "LoraLoaderModelOnly"
153
+ }
154
+ }
155
+ };
index.html ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8" />
5
+ <link rel="icon" type="image/svg+xml" href="/vite.svg" />
6
+ <meta name="viewport" content="width=device-width, initial-scale=1.0" />
7
+ <title>LoRA Caption Assistant</title>
8
+ <script src="https://cdn.tailwindcss.com"></script>
9
+ <script src="https://cdnjs.cloudflare.com/ajax/libs/jszip/3.10.1/jszip.min.js"></script>
10
+ <script type="importmap">
11
+ {
12
+ "imports": {
13
+ "react": "https://aistudiocdn.com/react@^19.2.0",
14
+ "react-dom/": "https://aistudiocdn.com/react-dom@^19.2.0/",
15
+ "react/": "https://aistudiocdn.com/react@^19.2.0/",
16
+ "@google/genai": "https://aistudiocdn.com/@google/genai@^1.30.0",
17
+ "@vitejs/plugin-react": "https://aistudiocdn.com/@vitejs/plugin-react@^5.1.1",
18
+ "vite": "https://aistudiocdn.com/vite@^7.2.2"
19
+ }
20
+ }
21
+ </script>
22
+ <link rel="stylesheet" href="/index.css">
23
+ </head>
24
+ <body class="bg-gray-900 text-gray-100">
25
+ <div id="root"></div>
26
+ <script type="module" src="/index.tsx"></script>
27
+ </body>
28
+ </html>
index.tsx ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import React from 'react';
3
+ import ReactDOM from 'react-dom/client';
4
+ import App from './App';
5
+
6
+ const rootElement = document.getElementById('root');
7
+ if (!rootElement) {
8
+ throw new Error("Could not find root element to mount to");
9
+ }
10
+
11
+ const root = ReactDOM.createRoot(rootElement);
12
+ root.render(
13
+ <React.StrictMode>
14
+ <App />
15
+ </React.StrictMode>
16
+ );
metadata.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "LoRA Caption Assistant",
3
+ "description": "High-quality image/video captioning for Wan 2.2 LoRAs using Gemini 3 Pro.",
4
+ "requestFramePermissions": [
5
+ "camera"
6
+ ]
7
+ }
package.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ {
3
+ "name": "lora-caption-assistant",
4
+ "version": "1.0.0",
5
+ "type": "module",
6
+ "scripts": {
7
+ "dev": "vite",
8
+ "build": "vite build",
9
+ "preview": "vite preview"
10
+ },
11
+ "dependencies": {
12
+ "@google/genai": "^1.30.0",
13
+ "react": "^19.2.0",
14
+ "react-dom": "^19.2.0"
15
+ },
16
+ "devDependencies": {
17
+ "@types/node": "^20.11.0",
18
+ "@types/react": "^19.2.0",
19
+ "@types/react-dom": "^19.2.0",
20
+ "@vitejs/plugin-react": "^4.3.1",
21
+ "typescript": "^5.5.3",
22
+ "vite": "^5.4.1"
23
+ }
24
+ }
services/comfyService.ts ADDED
@@ -0,0 +1,169 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /**
2
+ * Service for interacting with local ComfyUI instances
3
+ */
4
+
5
+ /**
6
+ * Converts a standard ComfyUI UI workflow (the one with 'nodes' array)
7
+ * to the API format that the /prompt endpoint expects.
8
+ */
9
+ const convertUiToApi = (uiWorkflow: any): any => {
10
+ const apiPrompt: any = {};
11
+
12
+ if (!uiWorkflow.nodes || !Array.isArray(uiWorkflow.nodes)) {
13
+ return uiWorkflow; // Already in API format or unknown
14
+ }
15
+
16
+ uiWorkflow.nodes.forEach((node: any) => {
17
+ const nodeId = node.id.toString();
18
+ const inputs: any = {};
19
+
20
+ // Map inputs based on links
21
+ if (uiWorkflow.links && node.inputs) {
22
+ node.inputs.forEach((input: any) => { // Removed unused 'index' parameter
23
+ const linkId = input.link;
24
+ if (linkId) {
25
+ const link = uiWorkflow.links.find((l: any) => l[0] === linkId);
26
+ if (link) {
27
+ // link format: [id, origin_id, origin_slot, target_id, target_slot, type]
28
+ inputs[input.name] = [link[1].toString(), link[2]];
29
+ }
30
+ }
31
+ });
32
+ }
33
+
34
+ // Map widgets to inputs
35
+ // This is a simplified mapping; standard nodes usually have widgets in a specific order
36
+ if (node.widgets_values && Array.isArray(node.widgets_values)) {
37
+ // Note: This mapping is brittle as it depends on node implementation,
38
+ // but for standard nodes it often works.
39
+ // We'll primarily rely on the explicit injection logic later.
40
+ node.widgets_values.forEach((val: any, idx: number) => {
41
+ // We don't strictly know the keys here without the node definition,
42
+ // but we store them to be safe.
43
+ inputs[`_widget_${idx}`] = val;
44
+ });
45
+ }
46
+
47
+ apiPrompt[nodeId] = {
48
+ class_type: node.type,
49
+ inputs: inputs,
50
+ _meta: { title: node.title || node.type }
51
+ };
52
+ });
53
+
54
+ return apiPrompt;
55
+ };
56
+
57
+ export const sendComfyPrompt = async (
58
+ serverUrl: string,
59
+ workflow: any,
60
+ promptText: string,
61
+ seed: number,
62
+ steps: number,
63
+ useSecureBridge: boolean = false,
64
+ signal?: AbortSignal
65
+ ): Promise<string> => {
66
+ const baseUrl = serverUrl.replace(/\/+$/, '');
67
+ console.log(`[ComfyUI] Starting preview. Bridge: ${useSecureBridge}, Target: ${baseUrl}`);
68
+
69
+ // 1. Prepare Workflow
70
+ let apiPrompt: any = {};
71
+ const isUiFormat = workflow.nodes && Array.isArray(workflow.nodes);
72
+
73
+ if (isUiFormat) {
74
+ console.log("[ComfyUI] Standard UI format detected. Attempting internal mapping...");
75
+ // For standard UI format, we'll try to find the nodes by type and title
76
+ apiPrompt = JSON.parse(JSON.stringify(workflow)); // Work on a copy
77
+
78
+ let promptNode = apiPrompt.nodes.find((n: any) =>
79
+ n.type === 'CLIPTextEncode' &&
80
+ ((n.title || "").toLowerCase().includes("positive") || !(n.title || "").toLowerCase().includes("negative"))
81
+ );
82
+ let samplerNode = apiPrompt.nodes.find((n: any) => n.type === 'KSampler' || n.type === 'KSamplerAdvanced');
83
+
84
+ if (promptNode) {
85
+ // In UI format, prompt is usually the first widget
86
+ if (promptNode.widgets_values) promptNode.widgets_values[0] = promptText;
87
+ }
88
+ if (samplerNode && samplerNode.widgets_values) {
89
+ if (seed !== -1) samplerNode.widgets_values[0] = seed;
90
+ if (steps !== -1) samplerNode.widgets_values[2] = steps;
91
+ }
92
+
93
+ // IMPORTANT: The /prompt endpoint REQUIRES API format.
94
+ // If we have UI format, we MUST convert it or it will fail.
95
+ apiPrompt = convertUiToApi(apiPrompt);
96
+ } else {
97
+ apiPrompt = JSON.parse(JSON.stringify(workflow));
98
+ // Identify nodes in API format
99
+ let promptNodeId = '';
100
+ let samplerNodeId = '';
101
+ for (const id in apiPrompt) {
102
+ const node = apiPrompt[id];
103
+ const type = node.class_type;
104
+ const title = (node._meta?.title || "").toLowerCase();
105
+ if (!promptNodeId && type === 'CLIPTextEncode' && (title.includes('positive') || !title.includes('negative'))) promptNodeId = id;
106
+ if (!samplerNodeId && (type === 'KSampler' || type === 'KSamplerAdvanced')) samplerNodeId = id;
107
+ }
108
+ if (promptNodeId) apiPrompt[promptNodeId].inputs.text = promptText;
109
+ if (samplerNodeId) {
110
+ if (seed !== -1) apiPrompt[samplerNodeId].inputs.seed = seed;
111
+ if (steps !== -1) apiPrompt[samplerNodeId].inputs.steps = steps;
112
+ }
113
+ }
114
+
115
+ // 2. Determine Endpoint
116
+ let fetchUrl = `${baseUrl}/prompt`;
117
+ let fetchHeaders: Record<string, string> = { 'Content-Type': 'application/json' };
118
+
119
+ if (useSecureBridge) {
120
+ fetchUrl = `${window.location.origin}/comfy-bridge/prompt`;
121
+ fetchHeaders['x-bridge-target'] = baseUrl;
122
+ }
123
+
124
+ // 3. Send Request
125
+ const response = await fetch(fetchUrl, {
126
+ method: 'POST',
127
+ headers: fetchHeaders,
128
+ body: JSON.stringify({ prompt: apiPrompt }),
129
+ signal
130
+ }).catch(err => {
131
+ if (err.name === 'AbortError') throw err;
132
+ throw new Error(`Connection failed: ${err.message}. Ensure your ComfyUI server or Bridge is reachable.`);
133
+ });
134
+
135
+ if (!response.ok) {
136
+ const errText = await response.text();
137
+ throw new Error(`Server Error (${response.status}): ${errText.substring(0, 100)}...`);
138
+ }
139
+
140
+ const { prompt_id } = await response.json();
141
+
142
+ // 4. Poll
143
+ const pollUrl = useSecureBridge ? `${window.location.origin}/comfy-bridge/history/${prompt_id}` : `${baseUrl}/history/${prompt_id}`;
144
+ const pollHeaders: HeadersInit = useSecureBridge ? { 'x-bridge-target': baseUrl } : {}; // Added explicit HeadersInit type
145
+
146
+ for (let i = 0; i < 60; i++) {
147
+ if (signal?.aborted) throw new Error("Aborted");
148
+
149
+ const hRes = await fetch(pollUrl, { headers: pollHeaders, signal });
150
+ if (hRes.ok) {
151
+ const history = await hRes.json();
152
+ if (history[prompt_id]) {
153
+ const outputs = history[prompt_id].outputs;
154
+ for (const nodeId in outputs) {
155
+ if (outputs[nodeId].images?.length > 0) {
156
+ const img = outputs[nodeId].images[0];
157
+ let finalUrl = useSecureBridge
158
+ ? `${window.location.origin}/comfy-bridge/view?filename=${img.filename}&subfolder=${img.subfolder}&type=${img.type}&target_base=${encodeURIComponent(baseUrl)}`
159
+ : `${baseUrl}/view?filename=${img.filename}&subfolder=${img.subfolder}&type=${img.type}`;
160
+ return finalUrl;
161
+ }
162
+ }
163
+ }
164
+ }
165
+ await new Promise(r => setTimeout(r, 3000));
166
+ }
167
+
168
+ throw new Error("Preview generation timed out.");
169
+ };
services/geminiService.ts ADDED
@@ -0,0 +1,165 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { GoogleGenAI, GenerateContentResponse } from "@google/genai";
2
+
3
+ const withRetry = async <T>(
4
+ apiCall: () => Promise<T>,
5
+ maxRetries: number = 3,
6
+ initialDelay: number = 1000
7
+ ): Promise<T> => {
8
+ let attempt = 0;
9
+ while (true) {
10
+ try {
11
+ return await apiCall();
12
+ } catch (error) {
13
+ attempt++;
14
+ if (
15
+ error instanceof Error &&
16
+ (error.message.includes("503") || error.message.toLowerCase().includes("overloaded")) &&
17
+ attempt < maxRetries
18
+ ) {
19
+ const delay = initialDelay * Math.pow(2, attempt - 1) + Math.random() * 500;
20
+ console.warn(`Attempt ${attempt} failed. Retrying in ${delay.toFixed(0)}ms...`);
21
+ await new Promise(resolve => setTimeout(resolve, delay));
22
+ } else {
23
+ throw error;
24
+ }
25
+ }
26
+ }
27
+ };
28
+
29
+ const fileToGenerativePart = async (file: File) => {
30
+ const base64EncodedDataPromise = new Promise<string>((resolve) => {
31
+ const reader = new FileReader();
32
+ reader.onloadend = () => {
33
+ if (typeof reader.result === 'string') {
34
+ resolve(reader.result.split(',')[1]);
35
+ }
36
+ };
37
+ reader.readAsDataURL(file);
38
+ });
39
+ return {
40
+ inlineData: { data: await base64EncodedDataPromise, mimeType: file.type },
41
+ };
42
+ };
43
+
44
+ const constructPrompt = (
45
+ triggerWord: string,
46
+ customInstructions?: string,
47
+ isCharacterTaggingEnabled?: boolean,
48
+ characterShowName?: string
49
+ ): string => {
50
+ let basePrompt = `You are an expert captioner for AI model training data. Your task is to describe the provided image/video in detail for a style LoRA. Follow these rules strictly:
51
+ 1. Start the caption with the trigger word: "${triggerWord}".
52
+ 2. Describe EVERYTHING visible: characters, clothing, actions, background, objects, lighting, and camera angle.
53
+ 3. Be objective and factual.
54
+ 4. DO NOT mention art styles or generic animation terms like "anime" or "cartoon".
55
+ 5. Write as a single, continuous paragraph.`;
56
+
57
+ if (isCharacterTaggingEnabled && characterShowName) {
58
+ basePrompt += `\n6. Identify characters from the show/series "${characterShowName}" and append tags at the end of the caption, separated by commas. The format for each tag must be "char_[charactername]" (e.g., ", char_simon, char_kamina"). If no characters are recognized, do not add tags.`;
59
+ }
60
+
61
+ if (customInstructions) {
62
+ return `${basePrompt}\n\nAdditional instructions: ${customInstructions}`;
63
+ }
64
+ return basePrompt;
65
+ };
66
+
67
+ export const generateCaption = async (
68
+ file: File,
69
+ triggerWord: string,
70
+ customInstructions?: string,
71
+ isCharacterTaggingEnabled?: boolean,
72
+ characterShowName?: string,
73
+ signal?: AbortSignal,
74
+ apiKeyOverride?: string,
75
+ model: string = 'gemini-3-pro-preview'
76
+ ): Promise<string> => {
77
+ const apiKey = apiKeyOverride || process.env.API_KEY;
78
+ if (!apiKey) throw new Error("API Key is missing. Please enter your Gemini API key in the Global Settings.");
79
+
80
+ const ai = new GoogleGenAI({ apiKey });
81
+ const imagePart = await fileToGenerativePart(file);
82
+ const prompt = constructPrompt(triggerWord, customInstructions, isCharacterTaggingEnabled, characterShowName);
83
+
84
+ const apiCall = () => ai.models.generateContent({
85
+ model: model,
86
+ contents: { parts: [imagePart, { text: prompt }] },
87
+ config: { signal } as any
88
+ });
89
+
90
+ const response: GenerateContentResponse = await withRetry(apiCall);
91
+
92
+ if (signal?.aborted) throw new Error("AbortError");
93
+
94
+ if (response.text) {
95
+ return response.text.trim();
96
+ }
97
+ throw new Error("No caption text returned from Gemini.");
98
+ };
99
+
100
+ export const refineCaption = async (
101
+ file: File,
102
+ currentCaption: string,
103
+ refinementInstructions: string,
104
+ signal?: AbortSignal,
105
+ apiKeyOverride?: string,
106
+ model: string = 'gemini-3-pro-preview'
107
+ ): Promise<string> => {
108
+ const apiKey = apiKeyOverride || process.env.API_KEY;
109
+ if (!apiKey) throw new Error("API Key is missing.");
110
+
111
+ const ai = new GoogleGenAI({ apiKey });
112
+ const imagePart = await fileToGenerativePart(file);
113
+ const prompt = `You are an expert editor for LoRA training data.
114
+ Refine the provided caption based on the visual information and the user's refinement instructions.
115
+ Maintain the continuous paragraph format and ensure the trigger word is preserved.
116
+
117
+ CURRENT CAPTION: "${currentCaption}"
118
+ REFINEMENT INSTRUCTIONS: "${refinementInstructions}"
119
+
120
+ Output only the refined caption.`;
121
+
122
+ const apiCall = () => ai.models.generateContent({
123
+ model: model,
124
+ contents: { parts: [imagePart, { text: prompt }] },
125
+ config: { signal } as any
126
+ });
127
+
128
+ const response: GenerateContentResponse = await withRetry(apiCall);
129
+ if (signal?.aborted) throw new Error("AbortError");
130
+
131
+ if (response.text) {
132
+ return response.text.trim();
133
+ }
134
+ throw new Error("No refined text returned.");
135
+ };
136
+
137
+ export const checkCaptionQuality = async (
138
+ file: File,
139
+ caption: string,
140
+ signal?: AbortSignal,
141
+ apiKeyOverride?: string,
142
+ model: string = 'gemini-3-pro-preview'
143
+ ): Promise<number> => {
144
+ const apiKey = apiKeyOverride || process.env.API_KEY;
145
+ if (!apiKey) throw new Error("API Key is missing.");
146
+
147
+ const ai = new GoogleGenAI({ apiKey });
148
+ const imagePart = await fileToGenerativePart(file);
149
+ const prompt = `Evaluate the following caption for accuracy and detail based on the image. Respond with ONLY an integer from 1 to 5.\nCaption: "${caption}"`;
150
+
151
+ try {
152
+ const apiCall = () => ai.models.generateContent({
153
+ model: model,
154
+ contents: { parts: [imagePart, { text: prompt }] },
155
+ config: { signal } as any
156
+ });
157
+ const response: GenerateContentResponse = await withRetry(apiCall);
158
+ const scoreText = response.text?.trim() || '0';
159
+ const score = parseInt(scoreText.match(/\d+/)?.[0] || '0', 10);
160
+ return score;
161
+ } catch (error) {
162
+ console.error("Quality check failed:", error);
163
+ return 0;
164
+ }
165
+ };
services/qwenService.ts ADDED
@@ -0,0 +1,212 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /**
2
+ * Service for interacting with Qwen-VL via OpenAI-compatible endpoints.
3
+ * This supports OpenRouter, Ollama, vLLM, etc.
4
+ */
5
+
6
+ const fileToBase64 = (file: File): Promise<string> => {
7
+ return new Promise((resolve, reject) => {
8
+ const reader = new FileReader();
9
+ reader.readAsDataURL(file);
10
+ reader.onload = () => {
11
+ if (typeof reader.result === 'string') {
12
+ resolve(reader.result); // This includes the data:image/png;base64, prefix
13
+ } else {
14
+ reject(new Error('Failed to convert file to base64'));
15
+ }
16
+ };
17
+ reader.onerror = error => reject(error);
18
+ });
19
+ };
20
+
21
+ const extractFramesFromVideo = async (videoFile: File, numberOfFrames: number): Promise<string[]> => {
22
+ return new Promise((resolve, reject) => {
23
+ const video = document.createElement('video');
24
+ video.preload = 'metadata';
25
+ video.muted = true;
26
+ video.playsInline = true;
27
+ const url = URL.createObjectURL(videoFile);
28
+ const frames: string[] = [];
29
+ const timeout = setTimeout(() => {
30
+ URL.revokeObjectURL(url);
31
+ video.src = "";
32
+ reject(new Error("Video processing timed out"));
33
+ }, 60000);
34
+
35
+ video.onloadeddata = async () => {
36
+ const duration = video.duration;
37
+ const canvas = document.createElement('canvas');
38
+ const ctx = canvas.getContext('2d');
39
+ if (!ctx) {
40
+ clearTimeout(timeout);
41
+ URL.revokeObjectURL(url);
42
+ reject(new Error("Could not create canvas context"));
43
+ return;
44
+ }
45
+ canvas.width = video.videoWidth;
46
+ canvas.height = video.videoHeight;
47
+ const step = duration / numberOfFrames;
48
+ try {
49
+ for (let i = 0; i < numberOfFrames; i++) {
50
+ const time = (step * i) + (step / 2);
51
+ await new Promise<void>((frameResolve) => {
52
+ const onSeeked = () => {
53
+ video.removeEventListener('seeked', onSeeked);
54
+ frameResolve();
55
+ };
56
+ video.addEventListener('seeked', onSeeked);
57
+ video.currentTime = time;
58
+ });
59
+ ctx.drawImage(video, 0, 0);
60
+ frames.push(canvas.toDataURL('image/jpeg', 0.8));
61
+ }
62
+ clearTimeout(timeout);
63
+ URL.revokeObjectURL(url);
64
+ video.src = "";
65
+ resolve(frames);
66
+ } catch (e) {
67
+ clearTimeout(timeout);
68
+ URL.revokeObjectURL(url);
69
+ reject(e);
70
+ }
71
+ };
72
+ video.onerror = () => {
73
+ clearTimeout(timeout);
74
+ URL.revokeObjectURL(url);
75
+ reject(new Error("Failed to load video file"));
76
+ };
77
+ video.src = url;
78
+ });
79
+ };
80
+
81
+ const constructPrompt = (
82
+ triggerWord: string,
83
+ customInstructions?: string,
84
+ isCharacterTaggingEnabled?: boolean,
85
+ characterShowName?: string
86
+ ): string => {
87
+ let basePrompt = `You are an expert captioner for AI model training data. Your task is to describe the provided image/video in detail for a style LoRA. Follow these rules strictly:
88
+ 1. Start the caption with the trigger word: "${triggerWord}".
89
+ 2. Describe EVERYTHING visible: characters, clothing, actions, background, objects, lighting, and camera angle.
90
+ 3. Be objective and factual.
91
+ 4. DO NOT mention the art style, "anime", "cartoon", "illustration", "2d", or "animation".
92
+ 5. Write the description as a single, continuous paragraph.`;
93
+
94
+ if (isCharacterTaggingEnabled && characterShowName && characterShowName.trim() !== '') {
95
+ basePrompt += `\n6. After the description, identify any characters from the show "${characterShowName}" and append their tags to the very end of the caption, separated by commas. The format for each tag must be "char_[charactername]" (e.g., ", char_simon, char_kamina"). If no characters are recognized, add no tags.`;
96
+ }
97
+
98
+ if (customInstructions) {
99
+ return `${basePrompt}\n\nIMPORTANT USER INSTRUCTIONS:\n${customInstructions}`;
100
+ }
101
+ return basePrompt;
102
+ };
103
+
104
+ export const generateCaptionQwen = async (
105
+ apiKey: string,
106
+ baseUrl: string,
107
+ model: string,
108
+ file: File,
109
+ triggerWord: string,
110
+ customInstructions?: string,
111
+ isCharacterTaggingEnabled?: boolean,
112
+ characterShowName?: string,
113
+ videoFrameCount: number = 8,
114
+ signal?: AbortSignal
115
+ ): Promise<string> => {
116
+ if (!baseUrl) throw new Error("Local Endpoint URL is required for Qwen.");
117
+ let endpoint = baseUrl;
118
+ if (!endpoint.includes('/chat/completions')) {
119
+ endpoint = endpoint.replace(/\/+$/, '') + '/chat/completions';
120
+ }
121
+ const prompt = constructPrompt(triggerWord, customInstructions, isCharacterTaggingEnabled, characterShowName);
122
+ let contentParts: any[] = [{ type: "text", text: prompt }];
123
+ if (file.type.startsWith('video/')) {
124
+ const frames = await extractFramesFromVideo(file, videoFrameCount);
125
+ frames.forEach(frame => contentParts.push({ type: "image_url", image_url: { url: frame } }));
126
+ } else {
127
+ const base64Image = await fileToBase64(file);
128
+ contentParts.push({ type: "image_url", image_url: { url: base64Image } });
129
+ }
130
+ const payload = {
131
+ model: model || 'Qwen/Qwen2.5-VL-7B-Instruct',
132
+ messages: [{ role: "user", content: contentParts }],
133
+ max_tokens: 1000,
134
+ temperature: 0.2
135
+ };
136
+ const headers: Record<string, string> = { "Content-Type": "application/json" };
137
+ if (apiKey) headers["Authorization"] = `Bearer ${apiKey}`;
138
+ const response = await fetch(endpoint, { method: "POST", headers, body: JSON.stringify(payload), signal });
139
+ if (!response.ok) throw new Error(`API Error: ${response.status}`);
140
+ const data = await response.json();
141
+ return data.choices?.[0]?.message?.content?.trim() || "";
142
+ };
143
+
144
+ export const refineCaptionQwen = async (
145
+ apiKey: string,
146
+ baseUrl: string,
147
+ model: string,
148
+ file: File,
149
+ currentCaption: string,
150
+ refinementInstructions: string,
151
+ videoFrameCount: number = 8,
152
+ signal?: AbortSignal
153
+ ): Promise<string> => {
154
+ let endpoint = baseUrl;
155
+ if (!endpoint.includes('/chat/completions')) endpoint = endpoint.replace(/\/+$/, '') + '/chat/completions';
156
+ const prompt = `Refine the following caption based on the visual information and the instructions. Output ONLY the refined text.
157
+ CURRENT CAPTION: "${currentCaption}"
158
+ INSTRUCTIONS: "${refinementInstructions}"`;
159
+ let contentParts: any[] = [{ type: "text", text: prompt }];
160
+ if (file.type.startsWith('video/')) {
161
+ const frames = await extractFramesFromVideo(file, 4);
162
+ frames.forEach(frame => contentParts.push({ type: "image_url", image_url: { url: frame } }));
163
+ } else {
164
+ const base64Image = await fileToBase64(file);
165
+ contentParts.push({ type: "image_url", image_url: { url: base64Image } });
166
+ }
167
+ const payload = {
168
+ model: model || 'Qwen/Qwen2.5-VL-7B-Instruct',
169
+ messages: [{ role: "user", content: contentParts }],
170
+ max_tokens: 1000,
171
+ temperature: 0.2
172
+ };
173
+ const headers: Record<string, string> = { "Content-Type": "application/json" };
174
+ if (apiKey) headers["Authorization"] = `Bearer ${apiKey}`;
175
+ const response = await fetch(endpoint, { method: "POST", headers, body: JSON.stringify(payload), signal });
176
+ const data = await response.json();
177
+ return data.choices?.[0]?.message?.content?.trim() || "";
178
+ };
179
+
180
+ export const checkQualityQwen = async (
181
+ apiKey: string,
182
+ baseUrl: string,
183
+ model: string,
184
+ file: File,
185
+ caption: string,
186
+ videoFrameCount: number = 8,
187
+ signal?: AbortSignal
188
+ ): Promise<number> => {
189
+ let endpoint = baseUrl;
190
+ if (!endpoint.includes('/chat/completions')) endpoint = endpoint.replace(/\/+$/, '') + '/chat/completions';
191
+ const prompt = `Evaluate the caption quality. Respond with ONLY an integer from 1 to 5.\nCaption: "${caption}"`;
192
+ let contentParts: any[] = [{ type: "text", text: prompt }];
193
+ if (file.type.startsWith('video/')) {
194
+ const frames = await extractFramesFromVideo(file, 4);
195
+ frames.forEach(frame => contentParts.push({ type: "image_url", image_url: { url: frame } }));
196
+ } else {
197
+ const base64Image = await fileToBase64(file);
198
+ contentParts.push({ type: "image_url", image_url: { url: base64Image } });
199
+ }
200
+ const payload = {
201
+ model: model || 'Qwen/Qwen2.5-VL-7B-Instruct',
202
+ messages: [{ role: "user", content: contentParts }],
203
+ max_tokens: 10,
204
+ temperature: 0.1
205
+ };
206
+ const headers: Record<string, string> = { "Content-Type": "application/json" };
207
+ if (apiKey) headers["Authorization"] = `Bearer ${apiKey}`;
208
+ const response = await fetch(endpoint, { method: "POST", headers, body: JSON.stringify(payload), signal });
209
+ const data = await response.json();
210
+ const text = data.choices?.[0]?.message?.content?.trim();
211
+ return parseInt(text?.match(/\d+/)?.[0] || '0', 10);
212
+ };
tsconfig.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "compilerOptions": {
3
+ "target": "ES2020",
4
+ "useDefineForClassFields": true,
5
+ "lib": ["ES2020", "DOM", "DOM.Iterable"],
6
+ "module": "ESNext",
7
+ "skipLibCheck": true,
8
+ "moduleResolution": "bundler",
9
+ "allowImportingTsExtensions": true,
10
+ "resolveJsonModule": true,
11
+ "isolatedModules": true,
12
+ "noEmit": true,
13
+ "jsx": "react-jsx",
14
+ "strict": true,
15
+ "noUnusedLocals": true,
16
+ "noUnusedParameters": true,
17
+ "noFallthroughCasesInSwitch": true,
18
+ "types": ["vite/client", "node"]
19
+ },
20
+ "include": ["**/*.ts", "**/*.tsx"]
21
+ }
types.ts ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ export enum GenerationStatus {
3
+ IDLE = 'idle',
4
+ GENERATING = 'generating',
5
+ CHECKING = 'checking',
6
+ SUCCESS = 'success',
7
+ ERROR = 'error',
8
+ }
9
+
10
+ export interface MediaFile {
11
+ id: string;
12
+ file: File;
13
+ previewUrl: string;
14
+ caption: string;
15
+ status: GenerationStatus;
16
+ errorMessage?: string;
17
+ isSelected: boolean;
18
+ customInstructions: string;
19
+ qualityScore?: number;
20
+ // ComfyUI Preview fields
21
+ comfyPreviewUrl?: string;
22
+ comfyStatus?: 'idle' | 'generating' | 'success' | 'error';
23
+ comfyErrorMessage?: string;
24
+ }
vite.config.ts ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { defineConfig, loadEnv } from 'vite';
2
+ import react from '@vitejs/plugin-react';
3
+
4
+ export default defineConfig(({ mode }) => {
5
+ // Load env file based on `mode` in the current working directory.
6
+ const env = loadEnv(mode, '.', '');
7
+
8
+ return {
9
+ plugins: [react()],
10
+ define: {
11
+ // Expose the API_KEY from the environment to the client-side code.
12
+ 'process.env.API_KEY': JSON.stringify(env.API_KEY),
13
+ 'process.env': {}
14
+ },
15
+ server: {
16
+ port: 7860,
17
+ host: true,
18
+ proxy: {
19
+ // Secure Bridge Proxy Implementation
20
+ '/comfy-bridge': {
21
+ target: 'http://localhost:8188', // Fallback target
22
+ changeOrigin: true,
23
+ secure: false,
24
+ ws: true,
25
+ router: (req: any) => { // Added explicit 'any' type
26
+ // Dynamically determine target from header or query param
27
+ const target = req.headers['x-bridge-target'] as string;
28
+ if (target) return target;
29
+
30
+ // For GET requests like images, target might be in query
31
+ const url = new URL(req.url!, 'http://localhost');
32
+ const queryTarget = url.searchParams.get('target_base');
33
+ if (queryTarget) return queryTarget;
34
+
35
+ return 'http://localhost:8188';
36
+ },
37
+ rewrite: (path: string) => path.replace(/^\/comfy-bridge/, ''),
38
+ onProxyReq: (proxyReq: any) => { // Fixed implicit any and removed unused params
39
+ // CRITICAL: Strip security headers that cause 403 on ComfyUI
40
+ proxyReq.removeHeader('origin');
41
+ proxyReq.removeHeader('referer');
42
+ proxyReq.removeHeader('x-bridge-target'); // Clean up internal header
43
+
44
+ // Also clean up query params used for routing
45
+ if (proxyReq.path.includes('target_base=')) {
46
+ proxyReq.path = proxyReq.path.replace(/[&?]target_base=[^&]*/, '');
47
+ if (proxyReq.path.endsWith('?') || proxyReq.path.endsWith('&')) {
48
+ proxyReq.path = proxyReq.path.slice(0, -1);
49
+ }
50
+ }
51
+ },
52
+ onProxyRes: (proxyRes: any) => { // Fixed implicit any and removed unused params
53
+ // Ensure CORS is handled by the proxy
54
+ proxyRes.headers['Access-Control-Allow-Origin'] = '*';
55
+ }
56
+ }
57
+ }
58
+ },
59
+ preview: {
60
+ port: 7860,
61
+ host: true,
62
+ allowedHosts: true,
63
+ proxy: {
64
+ // Implementation duplicated for preview mode (HF Spaces production)
65
+ '/comfy-bridge': {
66
+ target: 'http://localhost:8188',
67
+ changeOrigin: true,
68
+ secure: false,
69
+ ws: true,
70
+ router: (req: any) => { // Added explicit 'any' type
71
+ const target = req.headers['x-bridge-target'] as string;
72
+ if (target) return target;
73
+ const url = new URL(req.url!, 'http://localhost');
74
+ const queryTarget = url.searchParams.get('target_base');
75
+ return queryTarget || 'http://localhost:8188';
76
+ },
77
+ rewrite: (path: string) => path.replace(/^\/comfy-bridge/, ''),
78
+ onProxyReq: (proxyReq: any) => { // Fixed implicit any
79
+ proxyReq.removeHeader('origin');
80
+ proxyReq.removeHeader('referer');
81
+ proxyReq.removeHeader('x-bridge-target');
82
+ }
83
+ }
84
+ }
85
+ }
86
+ };
87
+ });