Spaces:
Running
Running
Sayı : 89600549-662-2906 Konu : Görev Listesi T.
Browse filesC.
ÇAljlşMA VE SOSYAL GÜVENLIK BAKANLIĞT lstanbul Rehberlik ve Teftiş Grup Başkanlığı 23/09/2025 Sayın Yasin ÖZDEMİR (Iş Müfettişi) istanbul Rehberlik ve Teftiş Grup Başkanlığının görev emri yazısı eki görev listesi aşağıda belirtilmiş olup görev evrakları ekte sunulmuştur. daha iyi olsun bu analiz ettiği bir dosya içeriği her çeviride kullanıcının onayı aldıktan sonra ögrenene öğrenen gitsin hatalı olabilecek yerlerde een son kullanıcadan onayı ile kalıpları karakterleri öğrensin daha sonra hata yapmadan ilerlesin
script.js
CHANGED
|
@@ -332,10 +332,18 @@ async function convertPDFToImages(pdfData) {
|
|
| 332 |
resolve(images);
|
| 333 |
});
|
| 334 |
}
|
| 335 |
-
|
| 336 |
async function extractTextFromImage(file) {
|
| 337 |
-
return new Promise((resolve, reject) => {
|
| 338 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 339 |
file :
|
| 340 |
file;
|
| 341 |
|
|
@@ -345,9 +353,11 @@ return new Promise((resolve, reject) => {
|
|
| 345 |
{
|
| 346 |
logger: m => console.log(m),
|
| 347 |
preserve_interword_spaces: true,
|
| 348 |
-
tessedit_pageseg_mode: 6, // Assume a single uniform block of text
|
| 349 |
tessedit_char_whitelist: 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789.,!?-(){}[]/\\\'" ğüşıöçĞÜŞİÖÇ', // Added Turkish chars
|
| 350 |
-
|
|
|
|
|
|
|
| 351 |
load_system_dawg: 1,
|
| 352 |
load_freq_dawg: 1,
|
| 353 |
user_words_suffix: 'tur',
|
|
@@ -363,8 +373,16 @@ return new Promise((resolve, reject) => {
|
|
| 363 |
}
|
| 364 |
}).catch(reject);
|
| 365 |
});
|
| 366 |
-
|
| 367 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 368 |
const parser = new DOMParser();
|
| 369 |
const doc = parser.parseFromString(hocr, 'text/html');
|
| 370 |
const paragraphs = doc.querySelectorAll('.ocr_par');
|
|
@@ -393,10 +411,28 @@ return new Promise((resolve, reject) => {
|
|
| 393 |
lineText += `${wordText} `;
|
| 394 |
}
|
| 395 |
});
|
| 396 |
-
|
| 397 |
// Better line spacing for Turkish text
|
| 398 |
formattedText += lineText.trim() + '\n\n';
|
| 399 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 400 |
|
| 401 |
formattedText += '\n';
|
| 402 |
});
|
|
@@ -404,10 +440,29 @@ return new Promise((resolve, reject) => {
|
|
| 404 |
return formattedText;
|
| 405 |
}
|
| 406 |
}
|
| 407 |
-
|
| 408 |
// Check if this was an OCR fallback result
|
| 409 |
const isOCRResult = result.content.includes('OCR processing attempted') ||
|
| 410 |
result.content.includes('Warning: No extractable text found');
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 411 |
const resultCard = document.createElement('div');
|
| 412 |
resultCard.className = 'bg-gray-50 rounded-lg p-4 shadow-sm';
|
| 413 |
|
|
|
|
| 332 |
resolve(images);
|
| 333 |
});
|
| 334 |
}
|
|
|
|
| 335 |
async function extractTextFromImage(file) {
|
| 336 |
+
return new Promise((resolve, reject) => {
|
| 337 |
+
// Apply learned corrections before OCR if any exist
|
| 338 |
+
let trainedWords = {};
|
| 339 |
+
if (window.ocrLearningDict) {
|
| 340 |
+
for (const [word, data] of Object.entries(window.ocrLearningDict)) {
|
| 341 |
+
if (data.confirmedCorrect && data.confirmedCorrect !== word) {
|
| 342 |
+
trainedWords[word] = data.confirmedCorrect;
|
| 343 |
+
}
|
| 344 |
+
}
|
| 345 |
+
}
|
| 346 |
+
const imageElement = file instanceof HTMLCanvasElement ?
|
| 347 |
file :
|
| 348 |
file;
|
| 349 |
|
|
|
|
| 353 |
{
|
| 354 |
logger: m => console.log(m),
|
| 355 |
preserve_interword_spaces: true,
|
| 356 |
+
tessedit_pageseg_mode: 6, // Assume a single uniform block of text
|
| 357 |
tessedit_char_whitelist: 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789.,!?-(){}[]/\\\'" ğüşıöçĞÜŞİÖÇ', // Added Turkish chars
|
| 358 |
+
user_defined_words: Object.keys(trainedWords).join(' '),
|
| 359 |
+
user_words: Object.values(trainedWords).join(' '),
|
| 360 |
+
tessedit_create_hocr: 1, // Include formatting info
|
| 361 |
load_system_dawg: 1,
|
| 362 |
load_freq_dawg: 1,
|
| 363 |
user_words_suffix: 'tur',
|
|
|
|
| 373 |
}
|
| 374 |
}).catch(reject);
|
| 375 |
});
|
| 376 |
+
function processFormattedOCR(hocr) {
|
| 377 |
+
// Apply learned corrections
|
| 378 |
+
if (window.ocrLearningDict) {
|
| 379 |
+
for (const [word, data] of Object.entries(window.ocrLearningDict)) {
|
| 380 |
+
if (data.confirmedCorrect && data.confirmedCorrect !== word) {
|
| 381 |
+
hocr = hocr.replace(new RegExp(word, 'g'), data.confirmedCorrect);
|
| 382 |
+
}
|
| 383 |
+
}
|
| 384 |
+
}
|
| 385 |
+
// Parse hOCR output to preserve formatting and layout
|
| 386 |
const parser = new DOMParser();
|
| 387 |
const doc = parser.parseFromString(hocr, 'text/html');
|
| 388 |
const paragraphs = doc.querySelectorAll('.ocr_par');
|
|
|
|
| 411 |
lineText += `${wordText} `;
|
| 412 |
}
|
| 413 |
});
|
|
|
|
| 414 |
// Better line spacing for Turkish text
|
| 415 |
formattedText += lineText.trim() + '\n\n';
|
| 416 |
+
|
| 417 |
+
// Store problematic words for learning
|
| 418 |
+
words.forEach(word => {
|
| 419 |
+
const wordConfidence = parseFloat(word.getAttribute('title')
|
| 420 |
+
.match(/x_wconf (\d+)/)[1]);
|
| 421 |
+
|
| 422 |
+
if (wordConfidence < 85) {
|
| 423 |
+
const originalWord = word.textContent || '';
|
| 424 |
+
if (!window.ocrLearningDict) window.ocrLearningDict = {};
|
| 425 |
+
if (!window.ocrLearningDict[originalWord]) {
|
| 426 |
+
window.ocrLearningDict[originalWord] = {
|
| 427 |
+
occurrences: 0,
|
| 428 |
+
confirmedCorrect: null,
|
| 429 |
+
suggestTime: null
|
| 430 |
+
};
|
| 431 |
+
}
|
| 432 |
+
window.ocrLearningDict[originalWord].occurrences++;
|
| 433 |
+
}
|
| 434 |
+
});
|
| 435 |
+
});
|
| 436 |
|
| 437 |
formattedText += '\n';
|
| 438 |
});
|
|
|
|
| 440 |
return formattedText;
|
| 441 |
}
|
| 442 |
}
|
| 443 |
+
function displayResult(result) {
|
| 444 |
// Check if this was an OCR fallback result
|
| 445 |
const isOCRResult = result.content.includes('OCR processing attempted') ||
|
| 446 |
result.content.includes('Warning: No extractable text found');
|
| 447 |
+
|
| 448 |
+
// Scan for potential errors and ask user confirmation
|
| 449 |
+
if (window.ocrLearningDict) {
|
| 450 |
+
for (const [word, data] of Object.entries(window.ocrLearningDict)) {
|
| 451 |
+
if (data.confirmedCorrect === null && result.content.includes(word)) {
|
| 452 |
+
data.suggestTime = new Date().toISOString();
|
| 453 |
+
if (confirm(`Is "${word}" correctly recognized? If not, please type the correct version.`)) {
|
| 454 |
+
data.confirmedCorrect = word;
|
| 455 |
+
} else {
|
| 456 |
+
const corrected = prompt(`Please enter correct version for "${word}":`, word);
|
| 457 |
+
if (corrected) {
|
| 458 |
+
data.confirmedCorrect = corrected;
|
| 459 |
+
// Replace in current result
|
| 460 |
+
result.content = result.content.replace(new RegExp(word, 'g'), corrected);
|
| 461 |
+
}
|
| 462 |
+
}
|
| 463 |
+
}
|
| 464 |
+
}
|
| 465 |
+
}
|
| 466 |
const resultCard = document.createElement('div');
|
| 467 |
resultCard.className = 'bg-gray-50 rounded-lg p-4 shadow-sm';
|
| 468 |
|