Spaces:
Runtime error
Runtime error
Commit
·
c5be12e
1
Parent(s):
371967d
Update app.py
Browse files
app.py
CHANGED
|
@@ -29,18 +29,40 @@ def predict(payload):
|
|
| 29 |
|
| 30 |
recorte_general = ""
|
| 31 |
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
|
| 36 |
recorte_final = ""
|
| 37 |
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
|
| 45 |
return recorte_final
|
| 46 |
|
|
|
|
| 29 |
|
| 30 |
recorte_general = ""
|
| 31 |
|
| 32 |
+
# Crear chunks
|
| 33 |
+
chunks = cortar_en_bloques(ocr_text, 150)
|
| 34 |
+
first = -1
|
| 35 |
+
margin = int(len(chunks) * 0.25)
|
| 36 |
+
chunks_removable = chunks[:margin] + chunks[-margin:]
|
| 37 |
+
|
| 38 |
+
for i in range(len(chunks)):
|
| 39 |
+
print('Recortando -', round((i/len(chunks))*100), '%')
|
| 40 |
+
if chunks[i] not in chunks_removable or trim_model.predict([chunks[i]]).item() == 1:
|
| 41 |
+
if first == -1:
|
| 42 |
+
first = i
|
| 43 |
+
recorte_general += chunks[i] + " "
|
| 44 |
+
|
| 45 |
+
if first > 0:
|
| 46 |
+
recorte_general = chunks[first-1] + recorte_general
|
| 47 |
+
print(100, '%')
|
| 48 |
|
| 49 |
recorte_final = ""
|
| 50 |
|
| 51 |
+
# Definir tamñano de fragmentos de texto
|
| 52 |
+
# text_splitter2 = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=0, length_function=len)
|
| 53 |
+
# Crear chunks
|
| 54 |
+
chunks2 = cortar_en_bloques(recorte_general, 80)
|
| 55 |
+
margin_s = int(len(chunks2) * 0.1)
|
| 56 |
+
margin_e = int(len(chunks2) * 0.1)
|
| 57 |
+
# if margin_s > 1:
|
| 58 |
+
chunks_removable2 = chunks2[:margin_s] + chunks2[-margin_e:]
|
| 59 |
+
# chunks_removable2 = chunks2[-margin_e:]
|
| 60 |
+
|
| 61 |
+
for i in range(len(chunks2)):
|
| 62 |
+
print('Recortando -', round((i/len(chunks2))*100), '%')
|
| 63 |
+
if chunks2[i] not in chunks_removable2 or trim_model.predict([chunks2[i]]).item() == 1:
|
| 64 |
+
recorte_final += chunks2[i] + " "
|
| 65 |
+
print(100, '%')
|
| 66 |
|
| 67 |
return recorte_final
|
| 68 |
|