Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -65,7 +65,7 @@ def preprocess_text_with_nlp_llm():
|
|
| 65 |
separators=["\n[PAGE", "\n", " "],
|
| 66 |
)
|
| 67 |
chunks = splitter.split_text(text)
|
| 68 |
-
return jsonify({'chunks': chunks, 'preprocessed_data': preprocessed_data)
|
| 69 |
|
| 70 |
@app.route('/preprocess_text_with_nlp_pymupdf', methods=['POST'])
|
| 71 |
def preprocess_text_with_nlp_pymupdf():
|
|
@@ -104,13 +104,21 @@ def preprocess_text_with_nlp_pymupdf():
|
|
| 104 |
for chunk in doc.noun_chunks
|
| 105 |
]
|
| 106 |
|
| 107 |
-
return {
|
| 108 |
"tokens_and_lemmas": tokens_and_lemmas,
|
| 109 |
"entities": entities,
|
| 110 |
"noun_chunks": noun_chunks,
|
| 111 |
"text": text,
|
| 112 |
-
}
|
| 113 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 114 |
|
| 115 |
if __name__ == '__main__':
|
| 116 |
app.run(host='0.0.0.0', port=7860)
|
|
|
|
| 65 |
separators=["\n[PAGE", "\n", " "],
|
| 66 |
)
|
| 67 |
chunks = splitter.split_text(text)
|
| 68 |
+
return jsonify({'chunks': chunks, 'preprocessed_data': preprocessed_data})
|
| 69 |
|
| 70 |
@app.route('/preprocess_text_with_nlp_pymupdf', methods=['POST'])
|
| 71 |
def preprocess_text_with_nlp_pymupdf():
|
|
|
|
| 104 |
for chunk in doc.noun_chunks
|
| 105 |
]
|
| 106 |
|
| 107 |
+
return jsonify({
|
| 108 |
"tokens_and_lemmas": tokens_and_lemmas,
|
| 109 |
"entities": entities,
|
| 110 |
"noun_chunks": noun_chunks,
|
| 111 |
"text": text,
|
| 112 |
+
})
|
| 113 |
+
|
| 114 |
+
@app.route('/recursive_character_text_splitter', methods=['POST'])
|
| 115 |
+
def recursive_character_text_splitter():
|
| 116 |
+
text = request.form.get('text', '')
|
| 117 |
+
splitter = RecursiveCharacterTextSplitter(
|
| 118 |
+
chunk_size=512, chunk_overlap=50, separators=["\n[PAGE", "\n", " "]
|
| 119 |
+
)
|
| 120 |
+
chunks = splitter.split_text(text)
|
| 121 |
+
return jsonify({"chunks": chunks})
|
| 122 |
|
| 123 |
if __name__ == '__main__':
|
| 124 |
app.run(host='0.0.0.0', port=7860)
|