ASDAD34 commited on
Commit
262d37a
·
verified ·
1 Parent(s): bfa635c

Sayı : 89600549-662-2906 Konu : Görev Listesi T.

Browse files

C.

ÇAljlşMA VE SOSYAL GÜVENLIK BAKANLIĞT lstanbul Rehberlik ve Teftiş Grup Başkanlığı 23/09/2025 Sayın Yasin ÖZDEMİR (Iş Müfettişi) istanbul Rehberlik ve Teftiş Grup Başkanlığının görev emri yazısı eki görev listesi aşağıda belirtilmiş olup görev evrakları ekte sunulmuştur. daha iyi olsun bu analiz ettiği bir dosya içeriği her çeviride kullanıcının onayı aldıktan sonra ögrenene öğrenen gitsin hatalı olabilecek yerlerde een son kullanıcadan onayı ile kalıpları karakterleri öğrensin daha sonra hata yapmadan ilerlesin

Files changed (1) hide show
  1. script.js +65 -10
script.js CHANGED
@@ -332,10 +332,18 @@ async function convertPDFToImages(pdfData) {
332
  resolve(images);
333
  });
334
  }
335
-
336
  async function extractTextFromImage(file) {
337
- return new Promise((resolve, reject) => {
338
- const imageElement = file instanceof HTMLCanvasElement ?
 
 
 
 
 
 
 
 
 
339
  file :
340
  file;
341
 
@@ -345,9 +353,11 @@ return new Promise((resolve, reject) => {
345
  {
346
  logger: m => console.log(m),
347
  preserve_interword_spaces: true,
348
- tessedit_pageseg_mode: 6, // Assume a single uniform block of text
349
  tessedit_char_whitelist: 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789.,!?-(){}[]/\\\'" ğüşıöçĞÜŞİÖÇ', // Added Turkish chars
350
- tessedit_create_hocr: 1, // Include formatting info
 
 
351
  load_system_dawg: 1,
352
  load_freq_dawg: 1,
353
  user_words_suffix: 'tur',
@@ -363,8 +373,16 @@ return new Promise((resolve, reject) => {
363
  }
364
  }).catch(reject);
365
  });
366
- function processFormattedOCR(hocr) {
367
- // Parse hOCR output to preserve formatting and layout
 
 
 
 
 
 
 
 
368
  const parser = new DOMParser();
369
  const doc = parser.parseFromString(hocr, 'text/html');
370
  const paragraphs = doc.querySelectorAll('.ocr_par');
@@ -393,10 +411,28 @@ return new Promise((resolve, reject) => {
393
  lineText += `${wordText} `;
394
  }
395
  });
396
-
397
  // Better line spacing for Turkish text
398
  formattedText += lineText.trim() + '\n\n';
399
- });
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
400
 
401
  formattedText += '\n';
402
  });
@@ -404,10 +440,29 @@ return new Promise((resolve, reject) => {
404
  return formattedText;
405
  }
406
  }
407
- function displayResult(result) {
408
  // Check if this was an OCR fallback result
409
  const isOCRResult = result.content.includes('OCR processing attempted') ||
410
  result.content.includes('Warning: No extractable text found');
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
411
  const resultCard = document.createElement('div');
412
  resultCard.className = 'bg-gray-50 rounded-lg p-4 shadow-sm';
413
 
 
332
  resolve(images);
333
  });
334
  }
 
335
  async function extractTextFromImage(file) {
336
+ return new Promise((resolve, reject) => {
337
+ // Apply learned corrections before OCR if any exist
338
+ let trainedWords = {};
339
+ if (window.ocrLearningDict) {
340
+ for (const [word, data] of Object.entries(window.ocrLearningDict)) {
341
+ if (data.confirmedCorrect && data.confirmedCorrect !== word) {
342
+ trainedWords[word] = data.confirmedCorrect;
343
+ }
344
+ }
345
+ }
346
+ const imageElement = file instanceof HTMLCanvasElement ?
347
  file :
348
  file;
349
 
 
353
  {
354
  logger: m => console.log(m),
355
  preserve_interword_spaces: true,
356
+ tessedit_pageseg_mode: 6, // Assume a single uniform block of text
357
  tessedit_char_whitelist: 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789.,!?-(){}[]/\\\'" ğüşıöçĞÜŞİÖÇ', // Added Turkish chars
358
+ user_defined_words: Object.keys(trainedWords).join(' '),
359
+ user_words: Object.values(trainedWords).join(' '),
360
+ tessedit_create_hocr: 1, // Include formatting info
361
  load_system_dawg: 1,
362
  load_freq_dawg: 1,
363
  user_words_suffix: 'tur',
 
373
  }
374
  }).catch(reject);
375
  });
376
+ function processFormattedOCR(hocr) {
377
+ // Apply learned corrections
378
+ if (window.ocrLearningDict) {
379
+ for (const [word, data] of Object.entries(window.ocrLearningDict)) {
380
+ if (data.confirmedCorrect && data.confirmedCorrect !== word) {
381
+ hocr = hocr.replace(new RegExp(word, 'g'), data.confirmedCorrect);
382
+ }
383
+ }
384
+ }
385
+ // Parse hOCR output to preserve formatting and layout
386
  const parser = new DOMParser();
387
  const doc = parser.parseFromString(hocr, 'text/html');
388
  const paragraphs = doc.querySelectorAll('.ocr_par');
 
411
  lineText += `${wordText} `;
412
  }
413
  });
 
414
  // Better line spacing for Turkish text
415
  formattedText += lineText.trim() + '\n\n';
416
+
417
+ // Store problematic words for learning
418
+ words.forEach(word => {
419
+ const wordConfidence = parseFloat(word.getAttribute('title')
420
+ .match(/x_wconf (\d+)/)[1]);
421
+
422
+ if (wordConfidence < 85) {
423
+ const originalWord = word.textContent || '';
424
+ if (!window.ocrLearningDict) window.ocrLearningDict = {};
425
+ if (!window.ocrLearningDict[originalWord]) {
426
+ window.ocrLearningDict[originalWord] = {
427
+ occurrences: 0,
428
+ confirmedCorrect: null,
429
+ suggestTime: null
430
+ };
431
+ }
432
+ window.ocrLearningDict[originalWord].occurrences++;
433
+ }
434
+ });
435
+ });
436
 
437
  formattedText += '\n';
438
  });
 
440
  return formattedText;
441
  }
442
  }
443
+ function displayResult(result) {
444
  // Check if this was an OCR fallback result
445
  const isOCRResult = result.content.includes('OCR processing attempted') ||
446
  result.content.includes('Warning: No extractable text found');
447
+
448
+ // Scan for potential errors and ask user confirmation
449
+ if (window.ocrLearningDict) {
450
+ for (const [word, data] of Object.entries(window.ocrLearningDict)) {
451
+ if (data.confirmedCorrect === null && result.content.includes(word)) {
452
+ data.suggestTime = new Date().toISOString();
453
+ if (confirm(`Is "${word}" correctly recognized? If not, please type the correct version.`)) {
454
+ data.confirmedCorrect = word;
455
+ } else {
456
+ const corrected = prompt(`Please enter correct version for "${word}":`, word);
457
+ if (corrected) {
458
+ data.confirmedCorrect = corrected;
459
+ // Replace in current result
460
+ result.content = result.content.replace(new RegExp(word, 'g'), corrected);
461
+ }
462
+ }
463
+ }
464
+ }
465
+ }
466
  const resultCard = document.createElement('div');
467
  resultCard.className = 'bg-gray-50 rounded-lg p-4 shadow-sm';
468