Spaces:
Paused
Paused
Rafael Uzarowski commited on
fix: deps for document_query, formatting of knowledge_tool results
Browse files
docker/base/fs/ins/install_base_packages.sh
CHANGED
|
@@ -4,7 +4,7 @@ set -e
|
|
| 4 |
echo "====================BASE PACKAGES START===================="
|
| 5 |
|
| 6 |
apt-get install -y --no-install-recommends \
|
| 7 |
-
nodejs npm openssh-server sudo curl wget git ffmpeg supervisor cron
|
| 8 |
|
| 9 |
echo "====================BASE PACKAGES NPM===================="
|
| 10 |
|
|
|
|
| 4 |
echo "====================BASE PACKAGES START===================="
|
| 5 |
|
| 6 |
apt-get install -y --no-install-recommends \
|
| 7 |
+
nodejs npm openssh-server sudo curl wget git ffmpeg supervisor cron tesseract-ocr-all poppler-utils
|
| 8 |
|
| 9 |
echo "====================BASE PACKAGES NPM===================="
|
| 10 |
|
python/helpers/document_query.py
CHANGED
|
@@ -592,8 +592,8 @@ class DocumentQueryHelper:
|
|
| 592 |
mimetype = response.headers["content-type"]
|
| 593 |
if "content-length" in response.headers:
|
| 594 |
content_length = float(response.headers["content-length"]) / 1024 / 1024 # MB
|
| 595 |
-
if content_length >
|
| 596 |
-
raise ValueError(f"Document content length exceeds max.
|
| 597 |
if mimetype and '; charset=' in mimetype:
|
| 598 |
mimetype = mimetype.split('; charset=')[0]
|
| 599 |
|
|
|
|
| 592 |
mimetype = response.headers["content-type"]
|
| 593 |
if "content-length" in response.headers:
|
| 594 |
content_length = float(response.headers["content-length"]) / 1024 / 1024 # MB
|
| 595 |
+
if content_length > 50.0:
|
| 596 |
+
raise ValueError(f"Document content length exceeds max. 50MB: {content_length} MB ({document_uri})")
|
| 597 |
if mimetype and '; charset=' in mimetype:
|
| 598 |
mimetype = mimetype.split('; charset=')[0]
|
| 599 |
|
python/tools/knowledge_tool.py
CHANGED
|
@@ -118,17 +118,17 @@ class Knowledge(Tool):
|
|
| 118 |
if "qa" in item:
|
| 119 |
outputs.append(
|
| 120 |
f"## Next Result\n"
|
| 121 |
-
f"Title: {item['title'].strip()}\n"
|
| 122 |
-
f"URL: {item['url'].strip()}\n"
|
| 123 |
-
f"Search Engine Summary:
|
| 124 |
-
f"Query Result:
|
| 125 |
)
|
| 126 |
else:
|
| 127 |
outputs.append(
|
| 128 |
f"## Next Result\n"
|
| 129 |
-
f"Title: {item['title'].strip()}\n"
|
| 130 |
-
f"URL: {item['url'].strip()}\n"
|
| 131 |
-
f"Search Engine Summary:
|
| 132 |
)
|
| 133 |
|
| 134 |
return "\n\n".join(outputs[:SEARCH_ENGINE_RESULTS]).strip()
|
|
|
|
| 118 |
if "qa" in item:
|
| 119 |
outputs.append(
|
| 120 |
f"## Next Result\n"
|
| 121 |
+
f"*Title*: {item['title'].strip()}\n"
|
| 122 |
+
f"*URL*: {item['url'].strip()}\n"
|
| 123 |
+
f"*Search Engine Summary*:\n{item['content'].strip()}\n"
|
| 124 |
+
f"*Query Result*:\n{item['qa'].strip()}"
|
| 125 |
)
|
| 126 |
else:
|
| 127 |
outputs.append(
|
| 128 |
f"## Next Result\n"
|
| 129 |
+
f"*Title*: {item['title'].strip()}\n"
|
| 130 |
+
f"*URL*: {item['url'].strip()}\n"
|
| 131 |
+
f"*Search Engine Summary*:\n{item['content'].strip()}"
|
| 132 |
)
|
| 133 |
|
| 134 |
return "\n\n".join(outputs[:SEARCH_ENGINE_RESULTS]).strip()
|