Spaces:
Paused
Paused
add grobid processing
Browse files
app.py
CHANGED
|
@@ -3,6 +3,7 @@ import pandas as pd
|
|
| 3 |
import numpy as np
|
| 4 |
import os
|
| 5 |
from grobidmonkey import reader
|
|
|
|
| 6 |
|
| 7 |
def save_uploaded_file(uploaded_file):
|
| 8 |
file_path = os.path.join("uploads", uploaded_file.name)
|
|
@@ -20,6 +21,16 @@ if uploaded_file is not None:
|
|
| 20 |
bytes_data = uploaded_file.getvalue()
|
| 21 |
st.write(len(bytes_data), "bytes")
|
| 22 |
saved_file_path = save_uploaded_file(uploaded_file)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
monkeyReader = reader.MonkeyReader('x2d')
|
| 24 |
outline = monkeyReader.readOutline(saved_file_path)
|
| 25 |
|
|
|
|
| 3 |
import numpy as np
|
| 4 |
import os
|
| 5 |
from grobidmonkey import reader
|
| 6 |
+
from grobid_client.grobid_client import GrobidClient
|
| 7 |
|
| 8 |
def save_uploaded_file(uploaded_file):
|
| 9 |
file_path = os.path.join("uploads", uploaded_file.name)
|
|
|
|
| 21 |
bytes_data = uploaded_file.getvalue()
|
| 22 |
st.write(len(bytes_data), "bytes")
|
| 23 |
saved_file_path = save_uploaded_file(uploaded_file)
|
| 24 |
+
|
| 25 |
+
os.makedirs("grobidoutputs", exist_ok=True)
|
| 26 |
+
client = GrobidClient(config_path="./config.json")
|
| 27 |
+
client.process("processFulltextDocument",
|
| 28 |
+
"./uploads/",
|
| 29 |
+
output="./grobidoutputs/",
|
| 30 |
+
n=20)
|
| 31 |
+
|
| 32 |
+
output_file_path = os.path.join("grobidoutputs", os.path.splitext(uploaded_file.name)[0] + "tei.xml")
|
| 33 |
+
|
| 34 |
monkeyReader = reader.MonkeyReader('x2d')
|
| 35 |
outline = monkeyReader.readOutline(saved_file_path)
|
| 36 |
|