OpenDeepResearch

Runtime error

Leonardo commited on Mar 28, 2025

Commit

fe63cd7

verified ·

1 Parent(s): 8520a66

Update scripts/text_cleaner_tool.py

Files changed (1) hide show

scripts/text_cleaner_tool.py CHANGED Viewed

@@ -11,7 +11,7 @@ class TextCleanerTool(Tool):
     name = "clean_text"
     description = (
         "Cleans and normalizes text using the cleantext library. "
-        "Example usage: clean_text(text='Your text here', options={'lower': True, 'no_urls': True})"
     )
     inputs = {
         "text": {"type": "string", "description": "The input text to clean"},
@@ -33,6 +33,25 @@ class TextCleanerTool(Tool):
         """
         Clean text using the cleantext library with flexible options.
         Example API:
         clean("some input",
             fix_unicode=True,               # fix various unicode errors

     name = "clean_text"
     description = (
         "Cleans and normalizes text using the cleantext library. "
+        "Transforms messy user-generated content into normalized text."
     )
     inputs = {
         "text": {"type": "string", "description": "The input text to clean"},
         """
         Clean text using the cleantext library with flexible options.
+        User-generated content on the Web and in social media is often dirty. Preprocess your scraped data with `clean-text` to create a normalized text representation. For instance, turn this corrupted input:
+        ```
+        A bunch of \\u2018new\\u2019 references, including [Moana](https://en.wikipedia.org/wiki/Moana_%282016_film%29).
+        »Yóù àré     rïght &lt;3!«
+        ```
+        into this clean output:
+        ```
+        A bunch of 'new' references, including [moana](<URL>).
+        "you are right <3!"
+        ```
+        `clean-text` uses ftfy, unidecode and numerous hand-crafted rules, i.e., RegEx.
         Example API:
         clean("some input",
             fix_unicode=True,               # fix various unicode errors