Rafael Uzarowski commited on
Commit
d76b7d0
·
unverified ·
1 Parent(s): e3ab7e8

fix: deps for document_query, formatting of knowledge_tool results

Browse files
docker/base/fs/ins/install_base_packages.sh CHANGED
@@ -4,7 +4,7 @@ set -e
4
  echo "====================BASE PACKAGES START===================="
5
 
6
  apt-get install -y --no-install-recommends \
7
- nodejs npm openssh-server sudo curl wget git ffmpeg supervisor cron
8
 
9
  echo "====================BASE PACKAGES NPM===================="
10
 
 
4
  echo "====================BASE PACKAGES START===================="
5
 
6
  apt-get install -y --no-install-recommends \
7
+ nodejs npm openssh-server sudo curl wget git ffmpeg supervisor cron tesseract-ocr-all poppler-utils
8
 
9
  echo "====================BASE PACKAGES NPM===================="
10
 
python/helpers/document_query.py CHANGED
@@ -592,8 +592,8 @@ class DocumentQueryHelper:
592
  mimetype = response.headers["content-type"]
593
  if "content-length" in response.headers:
594
  content_length = float(response.headers["content-length"]) / 1024 / 1024 # MB
595
- if content_length > 25.0:
596
- raise ValueError(f"Document content length exceeds max. 25MB: {content_length} MB ({document_uri})")
597
  if mimetype and '; charset=' in mimetype:
598
  mimetype = mimetype.split('; charset=')[0]
599
 
 
592
  mimetype = response.headers["content-type"]
593
  if "content-length" in response.headers:
594
  content_length = float(response.headers["content-length"]) / 1024 / 1024 # MB
595
+ if content_length > 50.0:
596
+ raise ValueError(f"Document content length exceeds max. 50MB: {content_length} MB ({document_uri})")
597
  if mimetype and '; charset=' in mimetype:
598
  mimetype = mimetype.split('; charset=')[0]
599
 
python/tools/knowledge_tool.py CHANGED
@@ -118,17 +118,17 @@ class Knowledge(Tool):
118
  if "qa" in item:
119
  outputs.append(
120
  f"## Next Result\n"
121
- f"Title: {item['title'].strip()}\n"
122
- f"URL: {item['url'].strip()}\n"
123
- f"Search Engine Summary: {item['content'].strip()}\n"
124
- f"Query Result: {item['qa'].strip()}"
125
  )
126
  else:
127
  outputs.append(
128
  f"## Next Result\n"
129
- f"Title: {item['title'].strip()}\n"
130
- f"URL: {item['url'].strip()}\n"
131
- f"Search Engine Summary: {item['content'].strip()}"
132
  )
133
 
134
  return "\n\n".join(outputs[:SEARCH_ENGINE_RESULTS]).strip()
 
118
  if "qa" in item:
119
  outputs.append(
120
  f"## Next Result\n"
121
+ f"*Title*: {item['title'].strip()}\n"
122
+ f"*URL*: {item['url'].strip()}\n"
123
+ f"*Search Engine Summary*:\n{item['content'].strip()}\n"
124
+ f"*Query Result*:\n{item['qa'].strip()}"
125
  )
126
  else:
127
  outputs.append(
128
  f"## Next Result\n"
129
+ f"*Title*: {item['title'].strip()}\n"
130
+ f"*URL*: {item['url'].strip()}\n"
131
+ f"*Search Engine Summary*:\n{item['content'].strip()}"
132
  )
133
 
134
  return "\n\n".join(outputs[:SEARCH_ENGINE_RESULTS]).strip()