amougou-fortiss commited on
Commit
78b7166
·
verified ·
1 Parent(s): 44c4e3b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -4
app.py CHANGED
@@ -65,7 +65,7 @@ def preprocess_text_with_nlp_llm():
65
  separators=["\n[PAGE", "\n", " "],
66
  )
67
  chunks = splitter.split_text(text)
68
- return jsonify({'chunks': chunks, 'preprocessed_data': preprocessed_data)
69
 
70
  @app.route('/preprocess_text_with_nlp_pymupdf', methods=['POST'])
71
  def preprocess_text_with_nlp_pymupdf():
@@ -104,13 +104,21 @@ def preprocess_text_with_nlp_pymupdf():
104
  for chunk in doc.noun_chunks
105
  ]
106
 
107
- return {
108
  "tokens_and_lemmas": tokens_and_lemmas,
109
  "entities": entities,
110
  "noun_chunks": noun_chunks,
111
  "text": text,
112
- }
113
-
 
 
 
 
 
 
 
 
114
 
115
  if __name__ == '__main__':
116
  app.run(host='0.0.0.0', port=7860)
 
65
  separators=["\n[PAGE", "\n", " "],
66
  )
67
  chunks = splitter.split_text(text)
68
+ return jsonify({'chunks': chunks, 'preprocessed_data': preprocessed_data})
69
 
70
  @app.route('/preprocess_text_with_nlp_pymupdf', methods=['POST'])
71
  def preprocess_text_with_nlp_pymupdf():
 
104
  for chunk in doc.noun_chunks
105
  ]
106
 
107
+ return jsonify({
108
  "tokens_and_lemmas": tokens_and_lemmas,
109
  "entities": entities,
110
  "noun_chunks": noun_chunks,
111
  "text": text,
112
+ })
113
+
114
+ @app.route('/recursive_character_text_splitter', methods=['POST'])
115
+ def recursive_character_text_splitter():
116
+ text = request.form.get('text', '')
117
+ splitter = RecursiveCharacterTextSplitter(
118
+ chunk_size=512, chunk_overlap=50, separators=["\n[PAGE", "\n", " "]
119
+ )
120
+ chunks = splitter.split_text(text)
121
+ return jsonify({"chunks": chunks})
122
 
123
  if __name__ == '__main__':
124
  app.run(host='0.0.0.0', port=7860)