Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -116,7 +116,7 @@ def preprocess_text(text):
|
|
| 116 |
text = re.sub(r'\s+', ' ', text)
|
| 117 |
return text.strip()
|
| 118 |
|
| 119 |
-
def split_text(text, max_tokens=
|
| 120 |
encoded = tokenizer.encode(text)
|
| 121 |
splits = []
|
| 122 |
for i in range(0, len(encoded), max_tokens):
|
|
@@ -125,8 +125,8 @@ def split_text(text, max_tokens=400):
|
|
| 125 |
return splits
|
| 126 |
|
| 127 |
# Function to generate text using CTranslate2
|
| 128 |
-
def ocr_correction(prompt, max_new_tokens=
|
| 129 |
-
splits = split_text(prompt, max_tokens=
|
| 130 |
corrected_splits = []
|
| 131 |
|
| 132 |
list_prompts = []
|
|
|
|
| 116 |
text = re.sub(r'\s+', ' ', text)
|
| 117 |
return text.strip()
|
| 118 |
|
| 119 |
+
def split_text(text, max_tokens=500):
|
| 120 |
encoded = tokenizer.encode(text)
|
| 121 |
splits = []
|
| 122 |
for i in range(0, len(encoded), max_tokens):
|
|
|
|
| 125 |
return splits
|
| 126 |
|
| 127 |
# Function to generate text using CTranslate2
|
| 128 |
+
def ocr_correction(prompt, max_new_tokens=500):
|
| 129 |
+
splits = split_text(prompt, max_tokens=500)
|
| 130 |
corrected_splits = []
|
| 131 |
|
| 132 |
list_prompts = []
|