OCRonos-TextCorrect

Sleeping

Pclanglais commited on Aug 4, 2024

Commit

2814dfb

verified ·

1 Parent(s): 63e8ceb

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -116,7 +116,7 @@ def preprocess_text(text):
     text = re.sub(r'\s+', ' ', text)
     return text.strip()
-def split_text(text, max_tokens=400):
     encoded = tokenizer.encode(text)
     splits = []
     for i in range(0, len(encoded), max_tokens):
@@ -125,8 +125,8 @@ def split_text(text, max_tokens=400):
     return splits
 # Function to generate text using CTranslate2
-def ocr_correction(prompt, max_new_tokens=600):
-    splits = split_text(prompt, max_tokens=400)
     corrected_splits = []
     list_prompts = []

     text = re.sub(r'\s+', ' ', text)
     return text.strip()
+def split_text(text, max_tokens=500):
     encoded = tokenizer.encode(text)
     splits = []
     for i in range(0, len(encoded), max_tokens):
     return splits
 # Function to generate text using CTranslate2
+def ocr_correction(prompt, max_new_tokens=500):
+    splits = split_text(prompt, max_tokens=500)
     corrected_splits = []
     list_prompts = []