Spaces:

amougou-fortiss
/

nlp-preprocessor

Sleeping

amougou-fortiss commited on Jul 22, 2025

Commit

78b7166

verified ·

1 Parent(s): 44c4e3b

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -65,7 +65,7 @@ def preprocess_text_with_nlp_llm():
         separators=["\n[PAGE", "\n", " "],
     )
     chunks = splitter.split_text(text)
-    return jsonify({'chunks': chunks, 'preprocessed_data': preprocessed_data)
 @app.route('/preprocess_text_with_nlp_pymupdf', methods=['POST'])
 def preprocess_text_with_nlp_pymupdf():
@@ -104,13 +104,21 @@ def preprocess_text_with_nlp_pymupdf():
         for chunk in doc.noun_chunks
     ]
-    return {
         "tokens_and_lemmas": tokens_and_lemmas,
         "entities": entities,
         "noun_chunks": noun_chunks,
         "text": text,
-    }
 if __name__ == '__main__':
     app.run(host='0.0.0.0', port=7860)

         separators=["\n[PAGE", "\n", " "],
     )
     chunks = splitter.split_text(text)
+    return jsonify({'chunks': chunks, 'preprocessed_data': preprocessed_data})
 @app.route('/preprocess_text_with_nlp_pymupdf', methods=['POST'])
 def preprocess_text_with_nlp_pymupdf():
         for chunk in doc.noun_chunks
     ]
+    return jsonify({
         "tokens_and_lemmas": tokens_and_lemmas,
         "entities": entities,
         "noun_chunks": noun_chunks,
         "text": text,
+    })
+@app.route('/recursive_character_text_splitter', methods=['POST'])
+def recursive_character_text_splitter():
+    text = request.form.get('text', '')
+    splitter = RecursiveCharacterTextSplitter(
+        chunk_size=512, chunk_overlap=50, separators=["\n[PAGE", "\n", " "]
+    )
+    chunks = splitter.split_text(text)
+    return jsonify({"chunks": chunks})
 if __name__ == '__main__':
     app.run(host='0.0.0.0', port=7860)