Update app.py
Browse files
app.py
CHANGED
|
@@ -1,7 +1,6 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
from transformers import pipeline, AutoTokenizer
|
| 3 |
from sentence_transformers import SentenceTransformer, util
|
| 4 |
-
import math
|
| 5 |
|
| 6 |
# Translation models
|
| 7 |
translation_models = {
|
|
@@ -45,10 +44,13 @@ def split_text(text, max_tokens=1024):
|
|
| 45 |
total_tokens = len(input_ids)
|
| 46 |
|
| 47 |
chunks = []
|
| 48 |
-
|
| 49 |
-
|
|
|
|
|
|
|
| 50 |
chunk_text = tokenizer.decode(chunk_ids, skip_special_tokens=True)
|
| 51 |
chunks.append(chunk_text)
|
|
|
|
| 52 |
|
| 53 |
return chunks
|
| 54 |
|
|
@@ -92,3 +94,4 @@ iface.launch()
|
|
| 92 |
|
| 93 |
|
| 94 |
|
|
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
from transformers import pipeline, AutoTokenizer
|
| 3 |
from sentence_transformers import SentenceTransformer, util
|
|
|
|
| 4 |
|
| 5 |
# Translation models
|
| 6 |
translation_models = {
|
|
|
|
| 44 |
total_tokens = len(input_ids)
|
| 45 |
|
| 46 |
chunks = []
|
| 47 |
+
start = 0
|
| 48 |
+
while start < total_tokens:
|
| 49 |
+
end = min(start + max_tokens, total_tokens)
|
| 50 |
+
chunk_ids = input_ids[start:end]
|
| 51 |
chunk_text = tokenizer.decode(chunk_ids, skip_special_tokens=True)
|
| 52 |
chunks.append(chunk_text)
|
| 53 |
+
start = end
|
| 54 |
|
| 55 |
return chunks
|
| 56 |
|
|
|
|
| 94 |
|
| 95 |
|
| 96 |
|
| 97 |
+
|