LeetTools commited on
Commit
b377d2b
·
verified ·
1 Parent(s): 5263105

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ data/README.pdf filter=lfs diff=lfs merge=lfs -text
.github/workflows/update_space.yml ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Run Python script
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - main
7
+
8
+ jobs:
9
+ build:
10
+ runs-on: ubuntu-latest
11
+
12
+ steps:
13
+ - name: Checkout
14
+ uses: actions/checkout@v2
15
+
16
+ - name: Set up Python
17
+ uses: actions/setup-python@v2
18
+ with:
19
+ python-version: '3.9'
20
+
21
+ - name: Install Gradio
22
+ run: python -m pip install gradio
23
+
24
+ - name: Log in to Hugging Face
25
+ run: python -c 'import huggingface_hub; huggingface_hub.login(token="${{ secrets.hf_token }}")'
26
+
27
+ - name: Deploy to Spaces
28
+ run: gradio deploy
.gitignore ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py,cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # poetry
98
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102
+ #poetry.lock
103
+
104
+ # pdm
105
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106
+ #pdm.lock
107
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108
+ # in version control.
109
+ # https://pdm.fming.dev/latest/usage/project/#working-with-version-control
110
+ .pdm.toml
111
+ .pdm-python
112
+ .pdm-build/
113
+
114
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
115
+ __pypackages__/
116
+
117
+ # Celery stuff
118
+ celerybeat-schedule
119
+ celerybeat.pid
120
+
121
+ # SageMath parsed files
122
+ *.sage.py
123
+
124
+ # Environments
125
+ .env
126
+ .venv
127
+ env/
128
+ venv/
129
+ ENV/
130
+ env.bak/
131
+ venv.bak/
132
+
133
+ # Spyder project settings
134
+ .spyderproject
135
+ .spyproject
136
+
137
+ # Rope project settings
138
+ .ropeproject
139
+
140
+ # mkdocs documentation
141
+ /site
142
+
143
+ # mypy
144
+ .mypy_cache/
145
+ .dmypy.json
146
+ dmypy.json
147
+
148
+ # Pyre type checker
149
+ .pyre/
150
+
151
+ # pytype static type analyzer
152
+ .pytype/
153
+
154
+ # Cython debug symbols
155
+ cython_debug/
156
+
157
+ # PyCharm
158
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
159
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
160
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
161
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
162
+ #.idea/
163
+
164
+ .gradio
165
+ .DS_Store
166
+ .env*
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2024 pengfeng
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
README.md CHANGED
@@ -1,12 +1,278 @@
1
  ---
2
- title: Ask.py
3
- emoji: 📚
4
- colorFrom: blue
5
- colorTo: red
6
  sdk: gradio
7
- sdk_version: 5.34.0
8
- app_file: app.py
9
- pinned: false
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: ask.py
3
+ app_file: ask.py
 
 
4
  sdk: gradio
5
+ sdk_version: 5.3.0
 
 
6
  ---
7
 
8
+ [![License](https://img.shields.io/github/license/pengfeng/ask.py)](LICENSE)
9
+
10
+ - [🚀 **Updates!** 🚀](#-updates-)
11
+ - [Introduction](#introduction)
12
+ - [Demo use cases](#demo-use-cases)
13
+ - [The search-extract-summarize flow](#the-search-extract-summarize-flow)
14
+ - [Quick start](#quick-start)
15
+ - [Use Different LLM Endpoints](#use-different-llm-endpoints)
16
+ - [Use local Ollama inference and embedding models](#use-local-ollama-inference-and-embedding-models)
17
+ - [Use DeepSeek API inference with OpenAI embedding models](#use-deepseek-api-inference-with-openai-embedding-models)
18
+ - [GradIO Deployment](#gradio-deployment)
19
+ - [Community](#community)
20
+
21
+
22
+ # 🚀 **Updates!** 🚀
23
+
24
+ A full version with db support and configurable components is open sourced here:
25
+ [LeetTools](https://github.com/leettools-dev/leettools). A demo web site has been setup
26
+ [here](https://svc.leettools.com). Please check them out!
27
+
28
+ We also added support for local Ollama inference and embedding models, as well as for other API
29
+ providers such as DeepSeek. Please see the [`Use Different LLM Endpoints`](#use-different-llm-endpoints) secton for more details.
30
+
31
+ > [UPDATE]
32
+ > - 2025-01-20: add support for separate API endpoints for inference and embedding
33
+ > - 2025-01-20: add support for .env file switch and Ollama example
34
+ > - 2025-01-20: add support for default search proxy
35
+ > - 2024-12-20: add the full function version link
36
+ > - 2024-11-20: add Docling converter and local mode to query against local files
37
+ > - 2024-11-10: add Chonkie as the default chunker
38
+ > - 2024-10-28: add extract function as a new output mode
39
+ > - 2024-10-25: add hybrid search demo using DuckDB full-text search
40
+ > - 2024-10-22: add GradIO integation
41
+ > - 2024-10-21: use DuckDB for the vector search and use API for embedding
42
+ > - 2024-10-20: allow to specify a list of input urls
43
+ > - 2024-10-18: output-language and output-length parameters for LLM
44
+ > - 2024-10-18: date-restrict and target-site parameters for seach
45
+
46
+ # Introduction
47
+
48
+ A single Python program to implement the search-extract-summarize flow, similar to AI search
49
+ engines such as Perplexity.
50
+
51
+ - You can run it with local Ollama inference and embedding models.
52
+ - You can run it on command line or with a GradIO UI.
53
+ - You can control the output behavior, e.g., extract structured data or change output language,
54
+ - You can control the search behavior, e.g., restrict to a specific site or date, or just scrape
55
+ a specified list of URLs.
56
+ - You can run it in a cron job or bash script to automate complex search/data extraction tasks.
57
+ - You can ask questions against local files.
58
+
59
+ We have a running UI example [in HuggingFace Spaces](https://huggingface.co/spaces/leettools/AskPy).
60
+
61
+ ![image](https://github.com/user-attachments/assets/0483e6a2-75d7-4fbd-813f-bfa13839c836)
62
+
63
+ ## Demo use cases
64
+
65
+ - [Search like Perplexity](demos/search_and_answer.md)
66
+ - [Only use the latest information from a specific site](demos/search_on_site_and_date.md)
67
+ - [Extract information from web search results](demos/search_and_extract.md)
68
+ - [Ask questions against local files](demos/local_files.md)
69
+ - [Use Ollama local LLM and Embedding models](demos/run_with_ollama.md)
70
+
71
+ > [!NOTE]
72
+ >
73
+ > - Our main goal is to illustrate the basic concepts of AI search engines with the raw constructs.
74
+ > Performance or scalability is not in the scope of this program.
75
+ > - We are planning to open source a real search-enabled AI toolset with real DB setup, real document
76
+ > pipeline, and real query engine soon. Star and watch this repo for updates!
77
+
78
+ ## The search-extract-summarize flow
79
+
80
+ Given a query, the program will
81
+
82
+ - in search mode: search Google for the top 10 web pages
83
+ - in local mode: use the local files under the 'data' directory
84
+ - crawl and scape the result documents for their text content
85
+ - chunk the text content into chunks and save them into a vectordb
86
+ - perform a hybrid search (vector and BM25 FTS) with the query and find the top 10 matched chunks
87
+ - [Optional] use a reranker to re-rank the top chunks
88
+ - use the top chunks as the context to ask an LLM to generate the answer
89
+ - output the answer with the references
90
+
91
+ Of course this flow is a very simplified version of the real AI search engines, but it is a good
92
+ starting point to understand the basic concepts.
93
+
94
+ One benefit is that we can manipulate the search function and output format.
95
+
96
+ For example, we can:
97
+
98
+ - search with date-restrict to only retrieve the latest information.
99
+ - search within a target-site to only create the answer from the contents from it.
100
+ - ask LLM to use a specific language to answer the question.
101
+ - ask LLM to answer with a specific length.
102
+ - crawl a specific list of urls and answer based on those contents only.
103
+
104
+ This program can serve as a playground to understand and experiment with different components in
105
+ the pipeline.
106
+
107
+ # Quick start
108
+
109
+ ```bash
110
+ # recommend to use Python 3.10 or later and use venv or conda to create a virtual environment
111
+ % pip install -r requirements.txt
112
+
113
+ # modify .env file to set the API keys or export them as environment variables as below
114
+
115
+ # you can use the Google search API, if not set we provide a default search engine proxy for testing
116
+ # % export SEARCH_API_KEY="your-google-search-api-key"
117
+ # % export SEARCH_PROJECT_KEY="your-google-cx-key"
118
+
119
+ # right now we use OpenAI API, default using OpenAI
120
+ # % export LLM_BASE_URL=https://api.openai.com/v1
121
+ % export LLM_API_KEY=<your-openai-api-key>
122
+
123
+ # By default, the program will start a web UI. See GradIO Deployment section for more info.
124
+ # Run the program on command line with -c option
125
+ % python ask.py -c -q "What is an LLM agent?"
126
+
127
+ # You can also query your local files under the 'data' directory using the local mode
128
+ % python ask.py -i local -c -q "How does Ask.py work?"
129
+
130
+ # we can specify more parameters to control the behavior such as date_restrict and target_site
131
+ % python ask.py --help
132
+ Usage: ask.py [OPTIONS]
133
+
134
+ Search web for the query and summarize the results.
135
+
136
+ Options:
137
+ -q, --query TEXT Query to search
138
+ -i, --input-mode [search|local]
139
+ Input mode for the query, default is search.
140
+ When using local, files under 'data' folder
141
+ will be used as input.
142
+ -o, --output-mode [answer|extract]
143
+ Output mode for the answer, default is a
144
+ simple answer
145
+ -d, --date-restrict INTEGER Restrict search results to a specific date
146
+ range, default is no restriction
147
+ -s, --target-site TEXT Restrict search results to a specific site,
148
+ default is no restriction
149
+ --output-language TEXT Output language for the answer
150
+ --output-length INTEGER Output length for the answer
151
+ --url-list-file TEXT Instead of doing web search, scrape the
152
+ target URL list and answer the query based
153
+ on the content
154
+ --extract-schema-file TEXT Pydantic schema for the extract mode
155
+ --inference-model-name TEXT Model name to use for inference
156
+ --vector-search-only Do not use hybrid search mode, use vector
157
+ search only.
158
+ -c, --run-cli Run as a command line tool instead of
159
+ launching the Gradio UI
160
+ -e, --env TEXT The environment file to use, absolute path
161
+ or related to package root.
162
+ -l, --log-level [DEBUG|INFO|WARNING|ERROR]
163
+ Set the logging level [default: INFO]
164
+ --help Show this message and exit.
165
+ ```
166
+
167
+ # Use Different LLM Endpoints
168
+
169
+ ## Use local Ollama inference and embedding models
170
+ We can run Ask.py with different env files to use different LLM endpoints and other
171
+ related settings. For example, if you have a local Ollama serving instance, you can set
172
+ to use it as follows:
173
+
174
+ ```bash
175
+ # you may need to pull the models first
176
+ % ollama pull llama3.2
177
+ % ollama pull nomic-embed-text
178
+ % ollama serve
179
+
180
+ % cat > .env.ollama <<EOF
181
+ LLM_BASE_URL=http://localhost:11434/v1
182
+ LLM_API_KEY=dummy-key
183
+ DEFAULT_INFERENCE_MODEL=llama3.2
184
+ EMBEDDING_MODEL=nomic-embed-text
185
+ EMBEDDING_DIMENSIONS=768
186
+ EOF
187
+
188
+ # Then run the command with the -e option to specify the .env file to use
189
+ % python ask.py -e .env.ollama -c -q "How does Ollama work?"
190
+ ```
191
+
192
+ ## Use DeepSeek API inference with OpenAI embedding models
193
+
194
+ We can also use one provider for inference and another for embedding. For example, we can use
195
+ DeepSeek API for inference and OpenAI for embedding since DeepSeek does not provide an embedding
196
+ endpoint as of Jan 2025:
197
+
198
+ ```bash
199
+ % cat > .env.deepseek <<EOF
200
+ LLM_BASE_URL=https://api.deepseek.com/v1
201
+ LLM_API_KEY=<deepseek-api-key>
202
+ DEFAULT_INFERENCE_MODEL=deepseek-chat
203
+
204
+ EMBED_BASE_URL=https://api.openai.com/v1
205
+ EMBED_API_KEY=<openai-api-key>
206
+ EMBEDDING_MODEL=text-embedding-3-small
207
+ EMBEDDING_DIMENSIONS=1536
208
+ EOF
209
+
210
+ % python ask.py -e .env.deepseek -c -q "How does DeepSeek work?"
211
+ ```
212
+
213
+
214
+ # GradIO Deployment
215
+
216
+ > [!NOTE]
217
+ > Original GradIO app-sharing document [here](https://www.gradio.app/guides/sharing-your-app).
218
+
219
+ **Quick test and sharing**
220
+
221
+ By default, the program will start a web UI and share through GradIO.
222
+
223
+ ```bash
224
+ % python ask.py
225
+ * Running on local URL: http://127.0.0.1:7860
226
+ * Running on public URL: https://77c277af0330326587.gradio.live
227
+
228
+ # you can also specify SHARE_GRADIO_UI to only run locally
229
+ % export SHARE_GRADIO_UI=False
230
+ % python ask.py
231
+ * Running on local URL: http://127.0.0.1:7860
232
+ ```
233
+
234
+ **To share a more permanent link using HuggingFace Spaces**
235
+
236
+ - First, you need to [create a free HuggingFace account](https://huggingface.co/welcome).
237
+ - Then in your [settings/token page](https://huggingface.co/settings/tokens), create a new token with Write permissions.
238
+ - In your terminal, run the following commands in you app directory to deploy your program to
239
+ HuggingFace Spaces:
240
+
241
+ ```bash
242
+ % pip install gradio
243
+ % gradio deploy
244
+ Creating new Spaces Repo in '/home/you/ask.py'. Collecting metadata, press Enter to accept default value.
245
+ Enter Spaces app title [ask.py]: ask.py
246
+ Enter Gradio app file [ask.py]:
247
+ Enter Spaces hardware (cpu-basic, cpu-upgrade, t4-small, t4-medium, l4x1, l4x4, zero-a10g, a10g-small, a10g-large, a10g-largex2, a10g-largex4, a100-large, v5e-1x1, v5e-2x2, v5e-2x4) [cpu-basic]:
248
+ Any Spaces secrets (y/n) [n]: y
249
+ Enter secret name (leave blank to end): SEARCH_API_KEY
250
+ Enter secret value for SEARCH_API_KEY: YOUR_SEARCH_API_KEY
251
+ Enter secret name (leave blank to end): SEARCH_PROJECT_KEY
252
+ Enter secret value for SEARCH_API_KEY: YOUR_SEARCH_PROJECT_KEY
253
+ Enter secret name (leave blank to end): LLM_API_KEY
254
+ Enter secret value for LLM_API_KEY: YOUR_LLM_API_KEY
255
+ Enter secret name (leave blank to end):
256
+ Create Github Action to automatically update Space on 'git push'? [n]: n
257
+ Space available at https://huggingface.co/spaces/your_user_name/ask.py
258
+ ```
259
+
260
+ Now you can use the HuggingFace space app to run your queries.
261
+
262
+
263
+ # Community
264
+
265
+ **License and Acknowledgment**
266
+
267
+ The source code is licensed under MIT license. Thanks for these amazing open-source projects and API
268
+ providers:
269
+
270
+ - [Google Search API](https://developers.google.com/custom-search/v1/overview)
271
+ - [OpenAI API](https://beta.openai.com/docs/api-reference/completions/create)
272
+ - [Jinja2](https://jinja.palletsprojects.com/en/3.0.x/)
273
+ - [bs4](https://www.crummy.com/software/BeautifulSoup/bs4/doc/)
274
+ - [DuckDB](https://github.com/duckdb/duckdb)
275
+ - [Docling](https://github.com/DS4SD/docling)
276
+ - [GradIO](https://github.com/gradio-app/gradio)
277
+ - [Chonkie](https://github.com/bhavnicksm/chonkie)
278
+
ask.py ADDED
@@ -0,0 +1,1156 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import csv
2
+ import io
3
+ import json
4
+ import logging
5
+ import os
6
+ import queue
7
+ import urllib.parse
8
+ from concurrent.futures import ThreadPoolExecutor
9
+ from datetime import datetime
10
+ from enum import Enum
11
+ from functools import partial
12
+ from queue import Queue
13
+ from typing import Any, Dict, Generator, List, Optional, Tuple, TypeVar
14
+
15
+ import click
16
+ import gradio as gr
17
+ import requests
18
+ from bs4 import BeautifulSoup
19
+ from chonkie import Chunk
20
+ from dotenv import load_dotenv
21
+ from jinja2 import BaseLoader, Environment
22
+ from openai import OpenAI
23
+ from pydantic import BaseModel, create_model
24
+
25
+ TypeVar_BaseModel = TypeVar("TypeVar_BaseModel", bound=BaseModel)
26
+
27
+ script_dir = os.path.dirname(os.path.abspath(__file__))
28
+ default_env_file = os.path.abspath(os.path.join(script_dir, ".env"))
29
+
30
+
31
+ class OutputMode(str, Enum):
32
+ answer = "answer"
33
+ extract = "extract"
34
+
35
+
36
+ class InputMode(str, Enum):
37
+ search = "search"
38
+ local = "local"
39
+
40
+
41
+ class AskSettings(BaseModel):
42
+ date_restrict: int
43
+ target_site: str
44
+ output_language: str
45
+ output_length: int
46
+ url_list: List[str]
47
+ inference_model_name: str
48
+ hybrid_search: bool
49
+ input_mode: InputMode
50
+ output_mode: OutputMode
51
+ extract_schema_str: str
52
+
53
+
54
+ def _get_logger(log_level: str) -> logging.Logger:
55
+ logger = logging.getLogger(__name__)
56
+ logger.setLevel(log_level)
57
+ if len(logger.handlers) > 0:
58
+ return logger
59
+
60
+ handler = logging.StreamHandler()
61
+ formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
62
+ handler.setFormatter(formatter)
63
+ logger.addHandler(handler)
64
+ return logger
65
+
66
+
67
+ def _read_url_list(url_list_file: str) -> List[str]:
68
+ if not url_list_file:
69
+ return []
70
+
71
+ with open(url_list_file, "r") as f:
72
+ links = f.readlines()
73
+ url_list = [
74
+ link.strip()
75
+ for link in links
76
+ if link.strip() != "" and not link.startswith("#")
77
+ ]
78
+ return url_list
79
+
80
+
81
+ def _read_extract_schema_str(extract_schema_file: str) -> str:
82
+ if not extract_schema_file:
83
+ return ""
84
+
85
+ with open(extract_schema_file, "r") as f:
86
+ schema_str = f.read()
87
+ return schema_str
88
+
89
+
90
+ def _output_csv(result_dict: Dict[str, List[BaseModel]], key_name: str) -> str:
91
+ # generate the CSV content from a Dict of URL and list of extracted items
92
+ output = io.StringIO()
93
+ csv_writer = None
94
+ for src_url, items in result_dict.items():
95
+ for item in items:
96
+ value_dict = item.model_dump()
97
+ item_with_url = {**value_dict, key_name: src_url}
98
+
99
+ if csv_writer is None:
100
+ headers = list(value_dict.keys()) + [key_name]
101
+ csv_writer = csv.DictWriter(output, fieldnames=headers)
102
+ csv_writer.writeheader()
103
+
104
+ csv_writer.writerow(item_with_url)
105
+
106
+ csv_content = output.getvalue()
107
+ output.close()
108
+ return csv_content
109
+
110
+
111
+ class Ask:
112
+
113
+ def __init__(self, logger: Optional[logging.Logger] = None):
114
+ if logger is not None:
115
+ self.logger = logger
116
+ else:
117
+ self.logger = _get_logger("INFO")
118
+
119
+ self.read_env_variables()
120
+
121
+ self.init_converter()
122
+ self.init_chunker()
123
+ self.init_db()
124
+
125
+ self.session = requests.Session()
126
+ user_agent: str = (
127
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
128
+ "AppleWebKit/537.36 (KHTML, like Gecko) "
129
+ "Chrome/119.0.0.0 Safari/537.36 Edg/119.0.0.0"
130
+ )
131
+ self.session.headers.update({"User-Agent": user_agent})
132
+
133
+ def read_env_variables(self) -> None:
134
+ err_msg = ""
135
+
136
+ self.search_api_url = os.environ.get("SEARCH_API_URL")
137
+ if self.search_api_url is None:
138
+ self.search_api_key = os.environ.get("SEARCH_API_KEY")
139
+ if self.search_api_key:
140
+ self.search_api_url = "https://www.googleapis.com/customsearch/v1"
141
+ self.search_project_id = os.environ.get("SEARCH_PROJECT_KEY")
142
+ if self.search_project_id is None:
143
+ err_msg += "SEARCH_PROJECT_KEY env variable not set while SEARCH_API_KEY is set.\n"
144
+ else:
145
+ self.logger.info("No SEARCH_API_URL or SEARCH_API_KEYenv variable set.")
146
+ self.logger.info(
147
+ "Using the default proxy at https://svc.leettools.com:8098"
148
+ )
149
+ self.search_api_url = "https://svc.leettools.com:8098/customsearch/v1"
150
+ self.search_api_key = "dummy-search-api-key"
151
+ self.search_project_id = "dummy-search-project-id"
152
+ else:
153
+ self.search_api_key = os.environ.get("SEARCH_API_KEY")
154
+ if self.search_api_key is None:
155
+ err_msg += (
156
+ f"SEARCH_API_KEY env variable not set for {self.search_api_url}.\n"
157
+ )
158
+ self.search_project_id = os.environ.get("SEARCH_PROJECT_KEY")
159
+ if self.search_project_id is None:
160
+ err_msg += f"SEARCH_PROJECT_KEY env variable not set for {self.search_api_url}.\n"
161
+
162
+ self.llm_base_url = os.environ.get("LLM_BASE_URL")
163
+ if self.llm_base_url is None:
164
+ self.llm_base_url = "https://api.openai.com/v1"
165
+
166
+ self.llm_api_key = os.environ.get("LLM_API_KEY")
167
+ if self.llm_api_key is None:
168
+ err_msg += "LLM_API_KEY env variable not set.\n"
169
+
170
+ self.default_inference_model = os.environ.get("DEFAULT_INFERENCE_MODEL")
171
+ if self.default_inference_model is None:
172
+ self.default_inference_model = "gpt-4o-mini"
173
+
174
+ self.embed_base_url = os.environ.get("EMBED_BASE_URL")
175
+ if self.embed_base_url is None:
176
+ self.embed_base_url = self.llm_base_url
177
+
178
+ self.embed_api_key = os.environ.get("EMBED_API_KEY")
179
+ if self.embed_api_key is None:
180
+ if self.embed_base_url == self.llm_base_url:
181
+ self.embed_api_key = self.llm_api_key
182
+ else:
183
+ err_msg += (
184
+ f"EMBED_API_KEY env variable not set for {self.embed_base_url}.\n"
185
+ )
186
+
187
+ self.embedding_model = os.environ.get("EMBEDDING_MODEL")
188
+ self.embedding_dimensions = os.environ.get("EMBEDDING_DIMENSIONS")
189
+
190
+ if self.embedding_model is None or self.embedding_dimensions is None:
191
+ self.embedding_model = "text-embedding-3-small"
192
+ self.embedding_dimensions = 1536
193
+
194
+ if err_msg != "":
195
+ raise Exception(f"\n{err_msg}\n")
196
+
197
+ def init_converter(self) -> None:
198
+ from docling.document_converter import DocumentConverter
199
+
200
+ self.logger.info("Initializing converter ...")
201
+ self.converter = DocumentConverter()
202
+ self.logger.info("✅ Successfully initialized Docling.")
203
+
204
+ def init_chunker(self) -> None:
205
+ from chonkie import TokenChunker
206
+
207
+ self.logger.info("Initializing chunker ...")
208
+ self.chunker = TokenChunker(chunk_size=1000, chunk_overlap=100)
209
+ self.logger.info("✅ Successfully initialized Chonkie.")
210
+
211
+ def init_db(self) -> None:
212
+ import duckdb
213
+
214
+ self.logger.info("Initializing database ...")
215
+ self.db_con = duckdb.connect(":memory:")
216
+ self.db_con.install_extension("vss")
217
+ self.db_con.load_extension("vss")
218
+ self.db_con.install_extension("fts")
219
+ self.db_con.load_extension("fts")
220
+ self.db_con.sql("CREATE SEQUENCE seq_docid START 1000")
221
+ self.logger.info("✅ Successfully initialized DuckDB.")
222
+
223
+ def convert_file_to_md(self, file_path: str) -> str:
224
+ result = self.converter.convert(file_path)
225
+ return result.document.export_to_markdown()
226
+
227
+ def search_web(self, query: str, settings: AskSettings) -> List[str]:
228
+ escaped_query = urllib.parse.quote(query)
229
+ url_base = (
230
+ f"{self.search_api_url}?key={self.search_api_key}"
231
+ f"&cx={self.search_project_id}&q={escaped_query}"
232
+ )
233
+ url_paras = f"&safe=active"
234
+ if settings.date_restrict > 0:
235
+ url_paras += f"&dateRestrict={settings.date_restrict}"
236
+ if settings.target_site:
237
+ url_paras += f"&siteSearch={settings.target_site}&siteSearchFilter=i"
238
+ url = f"{url_base}{url_paras}"
239
+
240
+ self.logger.debug(f"Searching for query: {query}")
241
+
242
+ resp = requests.get(url)
243
+
244
+ if resp is None:
245
+ raise Exception("No response from search API")
246
+
247
+ search_results_dict = json.loads(resp.text)
248
+ if "error" in search_results_dict:
249
+ raise Exception(
250
+ f"Error in search API response: {search_results_dict['error']}"
251
+ )
252
+
253
+ if "searchInformation" not in search_results_dict:
254
+ raise Exception(
255
+ f"No search information in search API response: {resp.text}"
256
+ )
257
+
258
+ total_results = search_results_dict["searchInformation"].get("totalResults", 0)
259
+ if total_results == 0:
260
+ self.logger.warning(f"No results found for query: {query}")
261
+ return []
262
+
263
+ results = search_results_dict.get("items", [])
264
+ if results is None or len(results) == 0:
265
+ self.logger.warning(f"No result items in the response for query: {query}")
266
+ return []
267
+
268
+ found_links = []
269
+ for result in results:
270
+ link = result.get("link", None)
271
+ if link is None or link == "":
272
+ self.logger.warning(f"Search result link missing: {result}")
273
+ continue
274
+ found_links.append(link)
275
+ return found_links
276
+
277
+ def _scape_url(self, url: str) -> Tuple[str, str]:
278
+ self.logger.info(f"Scraping {url} ...")
279
+ try:
280
+ response = self.session.get(url, timeout=10)
281
+ soup = BeautifulSoup(response.content, "lxml", from_encoding="utf-8")
282
+
283
+ body_tag = soup.body
284
+ if body_tag:
285
+ body_text = body_tag.get_text()
286
+ body_text = " ".join(body_text.split()).strip()
287
+ self.logger.debug(f"Scraped {url}: {body_text}...")
288
+ if len(body_text) > 100:
289
+ self.logger.info(
290
+ f"✅ Successfully scraped {url} with length: {len(body_text)}"
291
+ )
292
+ return url, body_text
293
+ else:
294
+ self.logger.warning(
295
+ f"Body text too short for url: {url}, length: {len(body_text)}"
296
+ )
297
+ return url, ""
298
+ else:
299
+ self.logger.warning(f"No body tag found in the response for url: {url}")
300
+ return url, ""
301
+ except Exception as e:
302
+ self.logger.error(f"Scraping error {url}: {e}")
303
+ return url, ""
304
+
305
+ def scrape_urls(self, urls: List[str]) -> Dict[str, str]:
306
+ # the key is the url and the value is the body text
307
+ scrape_results: Dict[str, str] = {}
308
+
309
+ partial_scrape = partial(self._scape_url)
310
+ with ThreadPoolExecutor(max_workers=10) as executor:
311
+ results = executor.map(partial_scrape, urls)
312
+
313
+ for url, body_text in results:
314
+ if body_text != "":
315
+ scrape_results[url] = body_text
316
+
317
+ return scrape_results
318
+
319
+ def chunk_results(self, scrape_results: Dict[str, str]) -> Dict[str, List[Chunk]]:
320
+ chunking_results: Dict[str, List[str]] = {}
321
+ for url, text in scrape_results.items():
322
+ chunking_results[url] = self.chunker.chunk(text)
323
+ return chunking_results
324
+
325
+ def get_embedding(self, client: OpenAI, texts: List[str]) -> List[List[float]]:
326
+ if len(texts) == 0:
327
+ return []
328
+
329
+ response = client.embeddings.create(input=texts, model=self.embedding_model)
330
+ embeddings = []
331
+ for i in range(len(response.data)):
332
+ embeddings.append(response.data[i].embedding)
333
+ return embeddings
334
+
335
+ def batch_get_embedding(
336
+ self, client: OpenAI, chunk_batch: Tuple[str, List[str]]
337
+ ) -> Tuple[Tuple[str, List[str]], List[List[float]]]:
338
+ """
339
+ Return the chunk_batch as well as the embeddings for each chunk so that
340
+ we can aggregate them and save them to the database together.
341
+
342
+ Args:
343
+ - client: OpenAI client
344
+ - chunk_batch: Tuple of URL and list of chunks scraped from the URL
345
+
346
+ Returns:
347
+ - Tuple of chunk_bach and list of result embeddings
348
+ """
349
+ texts = chunk_batch[1]
350
+ embeddings = self.get_embedding(client, texts)
351
+ return chunk_batch, embeddings
352
+
353
+ def _create_table(self) -> str:
354
+ # Simple ways to get a unique table name
355
+ timestamp = datetime.now().strftime("%Y_%m_%d_%H_%M_%S_%f")
356
+ table_name = f"document_chunks_{timestamp}"
357
+
358
+ self.db_con.execute(
359
+ f"""
360
+ CREATE TABLE {table_name} (
361
+ doc_id INTEGER PRIMARY KEY DEFAULT nextval('seq_docid'),
362
+ url TEXT,
363
+ chunk TEXT,
364
+ vec FLOAT[{self.embedding_dimensions}]
365
+ );
366
+ """
367
+ )
368
+ return table_name
369
+
370
+ def save_chunks_to_db(self, all_chunks: Dict[str, List[Chunk]]) -> str:
371
+ """
372
+ The key of chunking_results is the URL and the value is the list of chunks.
373
+ """
374
+ embed_client = self._get_embed_api_client()
375
+ embed_batch_size = 50
376
+ query_batch_size = 100
377
+ insert_data = []
378
+
379
+ table_name = self._create_table()
380
+
381
+ batches: List[Tuple[str, List[str]]] = []
382
+ for url, list_chunks in all_chunks.items():
383
+ for i in range(0, len(list_chunks), embed_batch_size):
384
+ batch = [chunk.text for chunk in list_chunks[i : i + embed_batch_size]]
385
+ batches.append((url, batch))
386
+
387
+ self.logger.info(f"Embedding {len(batches)} batches of chunks ...")
388
+ partial_get_embedding = partial(self.batch_get_embedding, embed_client)
389
+ with ThreadPoolExecutor(max_workers=10) as executor:
390
+ all_embeddings = executor.map(partial_get_embedding, batches)
391
+ self.logger.info(f"✅ Finished embedding.")
392
+
393
+ # We batch the insert data to speed up the insertion operation.
394
+ # Although the DuckDB doc says executeMany is optimized for batch insert,
395
+ # we found that it is faster to batch the insert data and run a single insert.
396
+ for chunk_batch, embeddings in all_embeddings:
397
+ url = chunk_batch[0]
398
+ list_chunks = chunk_batch[1]
399
+ insert_data.extend(
400
+ [
401
+ (url.replace("'", " "), chunk.replace("'", " "), embedding)
402
+ for chunk, embedding in zip(list_chunks, embeddings)
403
+ ]
404
+ )
405
+
406
+ for i in range(0, len(insert_data), query_batch_size):
407
+ value_str = ", ".join(
408
+ [
409
+ f"('{url}', '{chunk}', {embedding})"
410
+ for url, chunk, embedding in insert_data[i : i + embed_batch_size]
411
+ ]
412
+ )
413
+ query = f"""
414
+ INSERT INTO {table_name} (url, chunk, vec) VALUES {value_str};
415
+ """
416
+ self.db_con.execute(query)
417
+
418
+ self.db_con.execute(
419
+ f"""
420
+ CREATE INDEX {table_name}_cos_idx ON {table_name} USING HNSW (vec)
421
+ WITH (metric = 'cosine');
422
+ """
423
+ )
424
+ self.logger.info(f"✅ Created the vector index ...")
425
+ self.db_con.execute(
426
+ f"""
427
+ PRAGMA create_fts_index(
428
+ {table_name}, 'doc_id', 'chunk'
429
+ );
430
+ """
431
+ )
432
+ self.logger.info(f"✅ Created the full text search index ...")
433
+ return table_name
434
+
435
+ def vector_search(
436
+ self, table_name: str, query: str, settings: AskSettings
437
+ ) -> List[Dict[str, Any]]:
438
+ import duckdb
439
+
440
+ """
441
+ The return value is a list of {url: str, chunk: str} records.
442
+ In a real world, we will define a class of Chunk to have more metadata such as offsets.
443
+ """
444
+ embed_client = self._get_embed_api_client()
445
+ embeddings = self.get_embedding(embed_client, [query])[0]
446
+
447
+ query_result: duckdb.DuckDBPyRelation = self.db_con.sql(
448
+ f"""
449
+ SELECT * FROM {table_name}
450
+ ORDER BY array_distance(vec, {embeddings}::FLOAT[{self.embedding_dimensions}])
451
+ LIMIT 10;
452
+ """
453
+ )
454
+
455
+ self.logger.debug(query_result)
456
+
457
+ # use a dict to remove duplicates from vector search and full-text search
458
+ matched_chunks_dict = {}
459
+ for vec_result in query_result.fetchall():
460
+ doc_id = vec_result[0]
461
+ result_record = {
462
+ "url": vec_result[1],
463
+ "chunk": vec_result[2],
464
+ }
465
+ matched_chunks_dict[doc_id] = result_record
466
+
467
+ if settings.hybrid_search:
468
+ self.logger.info("Running full-text search ...")
469
+
470
+ self.db_con.execute(
471
+ f"""
472
+ PREPARE fts_query AS (
473
+ WITH scored_docs AS (
474
+ SELECT *, fts_main_{table_name}.match_bm25(
475
+ doc_id, ?, fields := 'chunk'
476
+ ) AS score FROM {table_name})
477
+ SELECT doc_id, url, chunk, score
478
+ FROM scored_docs
479
+ WHERE score IS NOT NULL
480
+ ORDER BY score DESC
481
+ LIMIT 10)
482
+ """
483
+ )
484
+ self.db_con.execute("PRAGMA threads=4")
485
+
486
+ # You can run more complex query rewrite methods here
487
+ # usually: stemming, stop words, etc.
488
+ escaped_query = query.replace("'", " ")
489
+ fts_result: duckdb.DuckDBPyRelation = self.db_con.execute(
490
+ f"EXECUTE fts_query('{escaped_query}')"
491
+ )
492
+
493
+ index = 0
494
+ for fts_record in fts_result.fetchall():
495
+ index += 1
496
+ self.logger.debug(f"The full text search record #{index}: {fts_record}")
497
+ doc_id = fts_record[0]
498
+ result_record = {
499
+ "url": fts_record[1],
500
+ "chunk": fts_record[2],
501
+ }
502
+
503
+ # You can configure the score threashold and top-k
504
+ if fts_record[3] > 1:
505
+ matched_chunks_dict[doc_id] = result_record
506
+ else:
507
+ break
508
+
509
+ if index >= 10:
510
+ break
511
+
512
+ return matched_chunks_dict.values()
513
+
514
+ def _get_inference_api_client(self) -> OpenAI:
515
+ return OpenAI(api_key=self.llm_api_key, base_url=self.llm_base_url)
516
+
517
+ def _get_embed_api_client(self) -> OpenAI:
518
+ return OpenAI(api_key=self.embed_api_key, base_url=self.embed_base_url)
519
+
520
+ def _render_template(self, template_str: str, variables: Dict[str, Any]) -> str:
521
+ env = Environment(loader=BaseLoader(), autoescape=False)
522
+ template = env.from_string(template_str)
523
+ return template.render(variables)
524
+
525
+ def _get_target_class(self, extract_schema_str: str) -> TypeVar_BaseModel:
526
+ local_namespace = {"BaseModel": BaseModel}
527
+ exec(extract_schema_str, local_namespace, local_namespace)
528
+ for key, value in local_namespace.items():
529
+ if key == "__builtins__":
530
+ continue
531
+ if key == "BaseModel":
532
+ continue
533
+ if isinstance(value, type):
534
+ if issubclass(value, BaseModel):
535
+ return value
536
+ raise Exception("No Pydantic schema found in the extract schema str.")
537
+
538
+ def run_inference(
539
+ self,
540
+ query: str,
541
+ matched_chunks: List[Dict[str, Any]],
542
+ settings: AskSettings,
543
+ ) -> str:
544
+ system_prompt = (
545
+ "You are an expert summarizing the answers based on the provided contents."
546
+ )
547
+ user_promt_template = """
548
+ Given the context as a sequence of references with a reference id in the
549
+ format of a leading [x], please answer the following question using {{ language }}:
550
+
551
+ {{ query }}
552
+
553
+ In the answer, use format [1], [2], ..., [n] in line where the reference is used.
554
+ For example, "According to the research from Google[3], ...".
555
+
556
+ Please create the answer strictly related to the context. If the context has no
557
+ information about the query, please write "No related information found in the context."
558
+ using {{ language }}.
559
+
560
+ {{ length_instructions }}
561
+
562
+ Here is the context:
563
+ {{ context }}
564
+ """
565
+ context = ""
566
+ for i, chunk in enumerate(matched_chunks):
567
+ context += f"[{i+1}] {chunk['chunk']}\n"
568
+
569
+ if not settings.output_length:
570
+ length_instructions = ""
571
+ else:
572
+ length_instructions = (
573
+ f"Please provide the answer in { settings.output_length } words."
574
+ )
575
+
576
+ user_prompt = self._render_template(
577
+ user_promt_template,
578
+ {
579
+ "query": query,
580
+ "context": context,
581
+ "language": settings.output_language,
582
+ "length_instructions": length_instructions,
583
+ },
584
+ )
585
+
586
+ final_inference_model = settings.inference_model_name
587
+ if settings.inference_model_name is None:
588
+ final_inference_model = self.default_inference_model
589
+
590
+ self.logger.debug(f"Running inference with model: {final_inference_model}")
591
+ self.logger.debug(f"Final user prompt: {user_prompt}")
592
+
593
+ api_client = self._get_inference_api_client()
594
+ completion = api_client.chat.completions.create(
595
+ model=final_inference_model,
596
+ messages=[
597
+ {
598
+ "role": "system",
599
+ "content": system_prompt,
600
+ },
601
+ {
602
+ "role": "user",
603
+ "content": user_prompt,
604
+ },
605
+ ],
606
+ )
607
+ if completion is None:
608
+ raise Exception("No completion from the API")
609
+
610
+ response_str = completion.choices[0].message.content
611
+ return response_str
612
+
613
+ def run_extract(
614
+ self,
615
+ query: str,
616
+ extract_schema_str: str,
617
+ target_content: str,
618
+ settings: AskSettings,
619
+ ) -> List[TypeVar_BaseModel]:
620
+ target_class = self._get_target_class(extract_schema_str)
621
+ system_prompt = (
622
+ "You are an expert of extract structual information from the document."
623
+ )
624
+ user_promt_template = """
625
+ Given the provided content, if it contains information about {{ query }}, please extract the
626
+ list of structured data items as defined in the following Pydantic schema:
627
+
628
+ {{ extract_schema_str }}
629
+
630
+ Below is the provided content:
631
+ {{ content }}
632
+ """
633
+ user_prompt = self._render_template(
634
+ user_promt_template,
635
+ {
636
+ "query": query,
637
+ "content": target_content,
638
+ "extract_schema_str": extract_schema_str,
639
+ },
640
+ )
641
+
642
+ self.logger.debug(
643
+ f"Running extraction with model: {settings.inference_model_name}"
644
+ )
645
+ self.logger.debug(f"Final user prompt: {user_prompt}")
646
+
647
+ class_name = target_class.__name__
648
+ list_class_name = f"{class_name}_list"
649
+ response_pydantic_model = create_model(
650
+ list_class_name,
651
+ items=(List[target_class], ...),
652
+ )
653
+
654
+ api_client = self._get_inference_api_client()
655
+ completion = api_client.beta.chat.completions.parse(
656
+ model=settings.inference_model_name,
657
+ messages=[
658
+ {
659
+ "role": "system",
660
+ "content": system_prompt,
661
+ },
662
+ {
663
+ "role": "user",
664
+ "content": user_prompt,
665
+ },
666
+ ],
667
+ response_format=response_pydantic_model,
668
+ )
669
+ if completion is None:
670
+ raise Exception("No completion from the API")
671
+
672
+ message = completion.choices[0].message
673
+ if message.refusal:
674
+ raise Exception(
675
+ f"Refused to extract information from the document: {message.refusal}."
676
+ )
677
+
678
+ extract_result = message.parsed
679
+ return extract_result.items
680
+
681
+ def run_query_gradio(
682
+ self,
683
+ query: str,
684
+ date_restrict: int,
685
+ target_site: str,
686
+ output_language: str,
687
+ output_length: int,
688
+ url_list_str: str,
689
+ inference_model_name: str,
690
+ hybrid_search: bool,
691
+ input_mode_str: str,
692
+ output_mode_str: str,
693
+ extract_schema_str: str,
694
+ ) -> Generator[Tuple[str, str], None, Tuple[str, str]]:
695
+ logger = self.logger
696
+ log_queue = Queue()
697
+
698
+ if url_list_str:
699
+ url_list = url_list_str.split("\n")
700
+ else:
701
+ url_list = []
702
+
703
+ settings = AskSettings(
704
+ date_restrict=date_restrict,
705
+ target_site=target_site,
706
+ output_language=output_language,
707
+ output_length=output_length,
708
+ url_list=url_list,
709
+ inference_model_name=inference_model_name,
710
+ hybrid_search=hybrid_search,
711
+ input_mode=InputMode(input_mode_str),
712
+ output_mode=OutputMode(output_mode_str),
713
+ extract_schema_str=extract_schema_str,
714
+ )
715
+
716
+ # add a queue handler to the logger to capture the logs
717
+ queue_handler = logging.Handler()
718
+ formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
719
+ queue_handler.emit = lambda record: log_queue.put(formatter.format(record))
720
+ logger.addHandler(queue_handler)
721
+
722
+ def update_logs():
723
+ logs = []
724
+ while True:
725
+ try:
726
+ log = log_queue.get_nowait()
727
+ logs.append(log)
728
+ except queue.Empty:
729
+ break
730
+ return "\n".join(logs)
731
+
732
+ # wrap the process in a generator to yield the logs to integrate with GradIO
733
+ def process_with_logs():
734
+ # the key is the URI and the result is the scraped text
735
+ target_documents: Dict[str, str] = {}
736
+
737
+ if settings.input_mode == InputMode.search:
738
+ if len(settings.url_list) > 0:
739
+ links = settings.url_list
740
+ else:
741
+ logger.info("Searching the web ...")
742
+ yield "", update_logs()
743
+ links = self.search_web(query, settings)
744
+ logger.info(f"✅ Found {len(links)} links for query: {query}")
745
+ for i, link in enumerate(links):
746
+ logger.debug(f"{i+1}. {link}")
747
+ yield "", update_logs()
748
+
749
+ logger.info("Scraping the URLs ...")
750
+ yield "", update_logs()
751
+ target_documents = self.scrape_urls(links)
752
+ logger.info(f"✅ Scraped {len(target_documents)} URLs.")
753
+ yield "", update_logs()
754
+ elif settings.input_mode == InputMode.local:
755
+ logger.info("Processing the local data directory ...")
756
+ yield "", update_logs()
757
+ # read the files from the data folder
758
+ data_folder = os.path.join(script_dir, "data")
759
+ if not os.path.exists(data_folder):
760
+ raise Exception("Data folder not found.")
761
+ for file_name in os.listdir(data_folder):
762
+ logger.info(f"Processing {file_name} ...")
763
+ yield "", update_logs()
764
+ file_path = os.path.join(data_folder, file_name)
765
+ file_uri = f"file://{file_path}"
766
+ target_documents[file_uri] = self.convert_file_to_md(file_path)
767
+ logger.info(f"✅ Finished processing {file_name}.")
768
+ yield "", update_logs()
769
+ else:
770
+ raise Exception(f"Invalid input mode: {settings.input_mode}")
771
+
772
+ if settings.output_mode == OutputMode.answer:
773
+ logger.info("Chunking the text ...")
774
+ yield "", update_logs()
775
+ all_chunks = self.chunk_results(target_documents)
776
+ chunk_count = 0
777
+ for url, chunks in all_chunks.items():
778
+ logger.debug(f"URL: {url}")
779
+ chunk_count += len(chunks)
780
+ for i, chunk in enumerate(chunks):
781
+ logger.debug(f"Chunk {i+1}: {chunk.text}")
782
+ logger.info(f"✅ Generated {chunk_count} chunks ...")
783
+ yield "", update_logs()
784
+
785
+ logger.info(f"Saving {chunk_count} chunks to DB ...")
786
+ yield "", update_logs()
787
+ table_name = self.save_chunks_to_db(all_chunks)
788
+ logger.info(f"✅ Successfully embedded and saved chunks to DB.")
789
+ yield "", update_logs()
790
+
791
+ logger.info("Querying the vector DB to get context ...")
792
+ matched_chunks = self.vector_search(table_name, query, settings)
793
+ for i, result in enumerate(matched_chunks):
794
+ logger.debug(f"{i+1}. {result}")
795
+ logger.info(f"✅ Got {len(matched_chunks)} matched chunks.")
796
+ yield "", update_logs()
797
+
798
+ logger.info("Running inference with context ...")
799
+ yield "", update_logs()
800
+ answer = self.run_inference(
801
+ query=query,
802
+ matched_chunks=matched_chunks,
803
+ settings=settings,
804
+ )
805
+ logger.info("✅ Finished inference API call.")
806
+ logger.info("Generating output ...")
807
+ yield "", update_logs()
808
+
809
+ answer = f"# Answer\n\n{answer}\n"
810
+ references = "\n".join(
811
+ [
812
+ f"[{i+1}] {result['url']}"
813
+ for i, result in enumerate(matched_chunks)
814
+ ]
815
+ )
816
+ yield f"{answer}\n\n# References\n\n{references}", update_logs()
817
+ elif settings.output_mode == OutputMode.extract:
818
+ logger.info("Extracting structured data ...")
819
+ yield "", update_logs()
820
+
821
+ aggregated_output = {}
822
+ for url, text in target_documents.items():
823
+ items = self.run_extract(
824
+ query=query,
825
+ extract_schema_str=extract_schema_str,
826
+ target_content=text,
827
+ settings=settings,
828
+ )
829
+ self.logger.info(
830
+ f"✅ Finished inference API call. Extracted {len(items)} items from {url}."
831
+ )
832
+ yield "", update_logs()
833
+
834
+ self.logger.debug(items)
835
+ aggregated_output[url] = items
836
+
837
+ logger.info("✅ Finished extraction from all urls.")
838
+ logger.info("Generating output ...")
839
+ yield "", update_logs()
840
+ answer = _output_csv(aggregated_output, "SourceURL")
841
+ yield f"{answer}", update_logs()
842
+ else:
843
+ raise Exception(f"Invalid output mode: {settings.output_mode}")
844
+
845
+ logs = ""
846
+ final_result = ""
847
+
848
+ try:
849
+ for result, log_update in process_with_logs():
850
+ logs += log_update + "\n"
851
+ final_result = result
852
+ yield final_result, logs
853
+ finally:
854
+ logger.removeHandler(queue_handler)
855
+
856
+ return final_result, logs
857
+
858
+ def run_query(
859
+ self,
860
+ query: str,
861
+ settings: AskSettings,
862
+ ) -> str:
863
+ url_list_str = "\n".join(settings.url_list)
864
+
865
+ for result, logs in self.run_query_gradio(
866
+ query=query,
867
+ date_restrict=settings.date_restrict,
868
+ target_site=settings.target_site,
869
+ output_language=settings.output_language,
870
+ output_length=settings.output_length,
871
+ url_list_str=url_list_str,
872
+ inference_model_name=settings.inference_model_name,
873
+ hybrid_search=settings.hybrid_search,
874
+ input_mode_str=settings.input_mode,
875
+ output_mode_str=settings.output_mode,
876
+ extract_schema_str=settings.extract_schema_str,
877
+ ):
878
+ final_result = result
879
+ return final_result
880
+
881
+
882
+ def launch_gradio(
883
+ query: str,
884
+ init_settings: AskSettings,
885
+ share_ui: bool,
886
+ logger: logging.Logger,
887
+ ) -> None:
888
+ ask = Ask(logger=logger)
889
+
890
+ def toggle_schema_textbox(option):
891
+ if option == "extract":
892
+ return gr.update(visible=True)
893
+ else:
894
+ return gr.update(visible=False)
895
+
896
+ with gr.Blocks() as demo:
897
+ gr.Markdown("# Ask.py - Web Search-Extract-Summarize")
898
+ gr.Markdown(
899
+ "Search the web with the query and summarize the results. Source code: https://github.com/pengfeng/ask.py"
900
+ )
901
+
902
+ with gr.Row():
903
+ with gr.Column():
904
+
905
+ query_input = gr.Textbox(label="Query", value=query)
906
+ input_mode_input = gr.Radio(
907
+ label="Input Mode [search: from search or url, local: from local data]",
908
+ choices=["search", "local"],
909
+ value=init_settings.input_mode,
910
+ )
911
+ output_mode_input = gr.Radio(
912
+ label="Output Mode [answer: simple answer, extract: get structured data]",
913
+ choices=["answer", "extract"],
914
+ value=init_settings.output_mode,
915
+ )
916
+ extract_schema_input = gr.Textbox(
917
+ label="Extract Pydantic Schema",
918
+ visible=(init_settings.output_mode == "extract"),
919
+ value=init_settings.extract_schema_str,
920
+ lines=5,
921
+ max_lines=20,
922
+ )
923
+ output_mode_input.change(
924
+ fn=toggle_schema_textbox,
925
+ inputs=output_mode_input,
926
+ outputs=extract_schema_input,
927
+ )
928
+ date_restrict_input = gr.Number(
929
+ label="Date Restrict (Optional) [0 or empty means no date limit.]",
930
+ value=init_settings.date_restrict,
931
+ )
932
+ target_site_input = gr.Textbox(
933
+ label="Target Sites (Optional) [Empty means searching the whole web.]",
934
+ value=init_settings.target_site,
935
+ )
936
+ output_language_input = gr.Textbox(
937
+ label="Output Language (Optional) [Default is English.]",
938
+ value=init_settings.output_language,
939
+ )
940
+ output_length_input = gr.Number(
941
+ label="Output Length in words (Optional) [Default is automatically decided by LLM.]",
942
+ value=init_settings.output_length,
943
+ )
944
+ url_list_input = gr.Textbox(
945
+ label="URL List (Optional) [When specified, scrape the urls instead of searching the web.]",
946
+ lines=5,
947
+ max_lines=20,
948
+ value="\n".join(init_settings.url_list),
949
+ )
950
+
951
+ with gr.Accordion("More Options", open=False):
952
+ hybrid_search_input = gr.Checkbox(
953
+ label="Hybrid Search [Use both vector search and full-text search.]",
954
+ value=init_settings.hybrid_search,
955
+ )
956
+ inference_model_name_input = gr.Textbox(
957
+ label="Inference Model Name",
958
+ value=init_settings.inference_model_name,
959
+ )
960
+
961
+ submit_button = gr.Button("Submit")
962
+
963
+ with gr.Column():
964
+ answer_output = gr.Textbox(label="Answer")
965
+ logs_output = gr.Textbox(label="Logs", lines=10)
966
+
967
+ submit_button.click(
968
+ fn=ask.run_query_gradio,
969
+ inputs=[
970
+ query_input,
971
+ date_restrict_input,
972
+ target_site_input,
973
+ output_language_input,
974
+ output_length_input,
975
+ url_list_input,
976
+ inference_model_name_input,
977
+ hybrid_search_input,
978
+ input_mode_input,
979
+ output_mode_input,
980
+ extract_schema_input,
981
+ ],
982
+ outputs=[answer_output, logs_output],
983
+ )
984
+
985
+ demo.queue().launch(share=share_ui)
986
+
987
+
988
+ @click.command(help="Search web for the query and summarize the results.")
989
+ @click.option("--query", "-q", required=False, help="Query to search")
990
+ @click.option(
991
+ "--input-mode",
992
+ "-i",
993
+ type=click.Choice(["search", "local"], case_sensitive=False),
994
+ default="search",
995
+ required=False,
996
+ help=(
997
+ "Input mode for the query, default is search. "
998
+ "When using local, files under 'data' folder will be used as input."
999
+ ),
1000
+ )
1001
+ @click.option(
1002
+ "--output-mode",
1003
+ "-o",
1004
+ type=click.Choice(["answer", "extract"], case_sensitive=False),
1005
+ default="answer",
1006
+ required=False,
1007
+ help="Output mode for the answer, default is a simple answer",
1008
+ )
1009
+ @click.option(
1010
+ "--date-restrict",
1011
+ "-d",
1012
+ type=int,
1013
+ required=False,
1014
+ default=0,
1015
+ help="Restrict search results to a specific date range, default is no restriction",
1016
+ )
1017
+ @click.option(
1018
+ "--target-site",
1019
+ "-s",
1020
+ required=False,
1021
+ default="",
1022
+ help="Restrict search results to a specific site, default is no restriction",
1023
+ )
1024
+ @click.option(
1025
+ "--output-language",
1026
+ required=False,
1027
+ default="English",
1028
+ help="Output language for the answer",
1029
+ )
1030
+ @click.option(
1031
+ "--output-length",
1032
+ type=int,
1033
+ required=False,
1034
+ default=0,
1035
+ help="Output length for the answer",
1036
+ )
1037
+ @click.option(
1038
+ "--url-list-file",
1039
+ type=str,
1040
+ required=False,
1041
+ default="",
1042
+ show_default=True,
1043
+ help="Instead of doing web search, scrape the target URL list and answer the query based on the content",
1044
+ )
1045
+ @click.option(
1046
+ "--extract-schema-file",
1047
+ type=str,
1048
+ required=False,
1049
+ default="",
1050
+ show_default=True,
1051
+ help="Pydantic schema for the extract mode",
1052
+ )
1053
+ @click.option(
1054
+ "--inference-model-name",
1055
+ required=False,
1056
+ default=None,
1057
+ help="Model name to use for inference",
1058
+ )
1059
+ @click.option(
1060
+ "--vector-search-only",
1061
+ is_flag=True,
1062
+ help="Do not use hybrid search mode, use vector search only.",
1063
+ )
1064
+ @click.option(
1065
+ "--run-cli",
1066
+ "-c",
1067
+ is_flag=True,
1068
+ help="Run as a command line tool instead of launching the Gradio UI",
1069
+ )
1070
+ @click.option(
1071
+ "-e",
1072
+ "--env",
1073
+ "env",
1074
+ default=None,
1075
+ required=False,
1076
+ help="The environment file to use, absolute path or related to package root.",
1077
+ )
1078
+ @click.option(
1079
+ "-l",
1080
+ "--log-level",
1081
+ "log_level",
1082
+ default="INFO",
1083
+ type=click.Choice(["DEBUG", "INFO", "WARNING", "ERROR"], case_sensitive=False),
1084
+ help="Set the logging level",
1085
+ show_default=True,
1086
+ )
1087
+ def search_extract_summarize(
1088
+ query: str,
1089
+ input_mode: str,
1090
+ output_mode: str,
1091
+ date_restrict: int,
1092
+ target_site: str,
1093
+ output_language: str,
1094
+ output_length: int,
1095
+ url_list_file: str,
1096
+ extract_schema_file: str,
1097
+ inference_model_name: str,
1098
+ vector_search_only: bool,
1099
+ run_cli: bool,
1100
+ env: str,
1101
+ log_level: str,
1102
+ ):
1103
+ load_dotenv(dotenv_path=default_env_file, override=False)
1104
+ logger = _get_logger(log_level)
1105
+
1106
+ if env:
1107
+ load_dotenv(dotenv_path=env, override=True)
1108
+
1109
+ final_inference_model_name = inference_model_name
1110
+ if final_inference_model_name is None:
1111
+ final_inference_model_name = os.environ.get("DEFAULT_INFERENCE_MODEL")
1112
+ if final_inference_model_name is None:
1113
+ final_inference_model_name = "gpt-4o-mini"
1114
+
1115
+ if output_mode == "extract":
1116
+ if not extract_schema_file:
1117
+ raise Exception("Extract mode requires the --extract-schema-file argument.")
1118
+
1119
+ if not final_inference_model_name.lower().startswith("gpt"):
1120
+ raise Exception("Extract mode requires the OpenAI GPT model.")
1121
+
1122
+ settings = AskSettings(
1123
+ date_restrict=date_restrict,
1124
+ target_site=target_site,
1125
+ output_language=output_language,
1126
+ output_length=output_length,
1127
+ url_list=_read_url_list(url_list_file),
1128
+ inference_model_name=final_inference_model_name,
1129
+ hybrid_search=(not vector_search_only),
1130
+ input_mode=InputMode(input_mode),
1131
+ output_mode=OutputMode(output_mode),
1132
+ extract_schema_str=_read_extract_schema_str(extract_schema_file),
1133
+ )
1134
+
1135
+ if run_cli:
1136
+ if query is None:
1137
+ raise Exception("Query is required for the command line mode")
1138
+ ask = Ask(logger=logger)
1139
+
1140
+ final_result = ask.run_query(query=query, settings=settings)
1141
+ click.echo(final_result)
1142
+ else:
1143
+ if os.environ.get("SHARE_GRADIO_UI", "false").lower() == "true":
1144
+ share_ui = True
1145
+ else:
1146
+ share_ui = False
1147
+ launch_gradio(
1148
+ query=query,
1149
+ init_settings=settings,
1150
+ share_ui=share_ui,
1151
+ logger=logger,
1152
+ )
1153
+
1154
+
1155
+ if __name__ == "__main__":
1156
+ search_extract_summarize()
data/README.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7507701ae3d2ee84506216ffca698c59f61bca1df77adb3312919bff1b049cd5
3
+ size 234937
demos/local_files.md ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ```bash
2
+ % python ask.py -i local -c -q "How does Ask.py work?"
3
+ 2024-11-20 10:00:09,335 - INFO - Initializing converter ...
4
+ 2024-11-20 10:00:09,335 - INFO - ✅ Successfully initialized Docling.
5
+ 2024-11-20 10:00:09,335 - INFO - Initializing chunker ...
6
+ 2024-11-20 10:00:09,550 - INFO - ✅ Successfully initialized Chonkie.
7
+ 2024-11-20 10:00:09,850 - INFO - Initializing database ...
8
+ 2024-11-20 10:00:09,933 - INFO - ✅ Successfully initialized DuckDB.
9
+ 2024-11-20 10:00:09,933 - INFO - Processing the local data directory ...
10
+ 2024-11-20 10:00:09,933 - INFO - Processing README.pdf ...
11
+ Fetching 9 files: 100%|████████████████████████████████████████████████████████████████████| 9/9 [00:00<00:00, 11781.75it/s]
12
+ 2024-11-20 10:00:29,629 - INFO - ✅ Finished processing README.pdf.
13
+ 2024-11-20 10:00:29,629 - INFO - Chunking the text ...
14
+ 2024-11-20 10:00:29,639 - INFO - ✅ Generated 2 chunks ...
15
+ 2024-11-20 10:00:29,639 - INFO - Saving 2 chunks to DB ...
16
+ 2024-11-20 10:00:29,681 - INFO - Embedding 1 batches of chunks ...
17
+ 2024-11-20 10:00:30,337 - INFO - ✅ Finished embedding.
18
+ 2024-11-20 10:00:30,423 - INFO - ✅ Created the vector index ...
19
+ 2024-11-20 10:00:30,483 - INFO - ✅ Created the full text search index ...
20
+ 2024-11-20 10:00:30,483 - INFO - ✅ Successfully embedded and saved chunks to DB.
21
+ 2024-11-20 10:00:30,483 - INFO - Querying the vector DB to get context ...
22
+ 2024-11-20 10:00:30,773 - INFO - Running full-text search ...
23
+ 2024-11-20 10:00:30,796 - INFO - ✅ Got 2 matched chunks.
24
+ 2024-11-20 10:00:30,797 - INFO - Running inference with context ...
25
+ 2024-11-20 10:00:34,939 - INFO - ✅ Finished inference API call.
26
+ 2024-11-20 10:00:34,939 - INFO - Generating output ...
27
+ # Answer
28
+
29
+ Ask.py is a Python program designed to implement a search-extract-summarize flow, similar to AI search engines like Perplexity. It can be run through a command line interface or a GradIO user interface and allows for flexibility in controlling output and search behaviors[1].
30
+
31
+ When a query is executed, Ask.py performs the following steps:
32
+
33
+ 1. Searches Google for the top 10 web pages related to the query.
34
+ 2. Crawls and scrapes the content of these pages.
35
+ 3. Breaks down the scraped text into chunks and saves them in a vector database.
36
+ 4. Conducts a vector search with the initial query to identify the top 10 matched text chunks.
37
+ 5. Optionally integrates full-text search results and uses a reranker to refine the results.
38
+ 6. Utilizes the selected chunks as context to query a language model (LLM) to generate a comprehensive answer.
39
+ 7. Outputs the answer along with references to the sources[1].
40
+
41
+ Moreover, the program allows various configurations such as date restrictions, site targeting, output language, and output length. It can also scrape specified URL lists instead of performing a web search, making it highly versatile for search and data extraction tasks[2].
42
+
43
+
44
+ # References
45
+
46
+ [1] file:///Users/feng/work/github/ask.py/data/README.pdf
47
+ [2] file:///Users/feng/work/github/ask.py/data/README.pdf
48
+ ```
demos/run_with_ollama.md ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ```bash
2
+ % python ask.py -e .env.ollama -c -q "How does Ollama work?"
3
+ 2025-01-20 13:36:15,026 - INFO - No SEARCH_API_URL or SEARCH_API_KEYenv variable set.
4
+ 2025-01-20 13:36:15,026 - INFO - Using the default proxy at https://svc.leettools.com:8098
5
+ 2025-01-20 13:36:19,395 - INFO - Initializing converter ...
6
+ 2025-01-20 13:36:19,395 - INFO - ✅ Successfully initialized Docling.
7
+ 2025-01-20 13:36:19,395 - INFO - Initializing chunker ...
8
+ 2025-01-20 13:36:19,614 - INFO - ✅ Successfully initialized Chonkie.
9
+ 2025-01-20 13:36:19,917 - INFO - Initializing database ...
10
+ 2025-01-20 13:36:19,992 - INFO - ✅ Successfully initialized DuckDB.
11
+ 2025-01-20 13:36:19,992 - INFO - Searching the web ...
12
+ 2025-01-20 13:36:20,653 - INFO - ✅ Found 10 links for query: How does Ollama work?
13
+ 2025-01-20 13:36:20,653 - INFO - Scraping the URLs ...
14
+ 2025-01-20 13:36:20,653 - INFO - Scraping https://www.reddit.com/r/ollama/comments/197thp1/does_anyone_know_how_ollama_works_under_the_hood/ ...
15
+ 2025-01-20 13:36:20,654 - INFO - Scraping https://medium.com/@mauryaanoop3/ollama-a-deep-dive-into-running-large-language-models-locally-part-1-0a4b70b30982 ...
16
+ 2025-01-20 13:36:20,655 - INFO - Scraping https://www.reddit.com/r/LocalLLaMA/comments/1dhyxq8/why_use_ollama/ ...
17
+ 2025-01-20 13:36:20,656 - INFO - Scraping https://github.com/jmorganca/ollama/issues/1014 ...
18
+ 2025-01-20 13:36:20,657 - INFO - Scraping https://www.listedai.co/ai/ollama ...
19
+ 2025-01-20 13:36:20,657 - INFO - Scraping https://www.andreagrandi.it/posts/ollama-running-llm-locally/ ...
20
+ 2025-01-20 13:36:20,658 - INFO - Scraping https://itsfoss.com/ollama/ ...
21
+ 2025-01-20 13:36:20,659 - INFO - Scraping https://community.n8n.io/t/ollama-embedding-does-not-accept-the-model-but-using-it-with-http-request-works/64457 ...
22
+ 2025-01-20 13:36:20,659 - INFO - Scraping https://community.frame.work/t/ollama-framework-13-amd/53848 ...
23
+ 2025-01-20 13:36:20,660 - INFO - Scraping https://abvijaykumar.medium.com/ollama-brings-runtime-to-serve-llms-everywhere-8a23b6f6a1b4 ...
24
+ 2025-01-20 13:36:20,802 - INFO - ✅ Successfully scraped https://abvijaykumar.medium.com/ollama-brings-runtime-to-serve-llms-everywhere-8a23b6f6a1b4 with length: 6408
25
+ 2025-01-20 13:36:20,861 - INFO - ✅ Successfully scraped https://www.andreagrandi.it/posts/ollama-running-llm-locally/ with length: 10535
26
+ 2025-01-20 13:36:20,891 - INFO - ✅ Successfully scraped https://itsfoss.com/ollama/ with length: 8772
27
+ 2025-01-20 13:36:20,969 - INFO - ✅ Successfully scraped https://community.frame.work/t/ollama-framework-13-amd/53848 with length: 4434
28
+ 2025-01-20 13:36:21,109 - WARNING - Body text too short for url: https://github.com/jmorganca/ollama/issues/1014, length: 9
29
+ 2025-01-20 13:36:21,370 - INFO - ✅ Successfully scraped https://www.reddit.com/r/ollama/comments/197thp1/does_anyone_know_how_ollama_works_under_the_hood/ with length: 2116
30
+ 2025-01-20 13:36:21,378 - INFO - ✅ Successfully scraped https://medium.com/@mauryaanoop3/ollama-a-deep-dive-into-running-large-language-models-locally-part-1-0a4b70b30982 with length: 6594
31
+ 2025-01-20 13:36:21,432 - INFO - ✅ Successfully scraped https://www.reddit.com/r/LocalLLaMA/comments/1dhyxq8/why_use_ollama/ with length: 2304
32
+ 2025-01-20 13:36:21,734 - INFO - ✅ Successfully scraped https://community.n8n.io/t/ollama-embedding-does-not-accept-the-model-but-using-it-with-http-request-works/64457 with length: 2875
33
+ 2025-01-20 13:36:21,776 - INFO - ✅ Successfully scraped https://www.listedai.co/ai/ollama with length: 5516
34
+ 2025-01-20 13:36:21,776 - INFO - ✅ Scraped 9 URLs.
35
+ 2025-01-20 13:36:21,776 - INFO - Chunking the text ...
36
+ 2025-01-20 13:36:21,784 - INFO - ✅ Generated 18 chunks ...
37
+ 2025-01-20 13:36:21,784 - INFO - Saving 18 chunks to DB ...
38
+ 2025-01-20 13:36:21,807 - INFO - Embedding 9 batches of chunks ...
39
+ 2025-01-20 13:36:40,752 - INFO - ✅ Finished embedding.
40
+ 2025-01-20 13:36:40,930 - INFO - ✅ Created the vector index ...
41
+ 2025-01-20 13:36:41,010 - INFO - ✅ Created the full text search index ...
42
+ 2025-01-20 13:36:41,010 - INFO - ✅ Successfully embedded and saved chunks to DB.
43
+ 2025-01-20 13:36:41,011 - INFO - Querying the vector DB to get context ...
44
+ 2025-01-20 13:36:41,091 - INFO - Running full-text search ...
45
+ 2025-01-20 13:36:41,118 - INFO - ✅ Got 10 matched chunks.
46
+ 2025-01-20 13:36:41,118 - INFO - Running inference with context ...
47
+ 2025-01-20 13:37:59,233 - INFO - ✅ Finished inference API call.
48
+ 2025-01-20 13:37:59,234 - INFO - Generating output ...
49
+ # Answer
50
+
51
+ Here is the reformatted output:
52
+
53
+ **Conclusion**
54
+
55
+ Though there are plenty of similar tools, Ollama has become the most popular tool to run LLMs locally. The ease of use in installing different LLMs quickly make it ideal for beginners who want to use local AI.
56
+
57
+ **Dealing with Issues**
58
+
59
+ If you still have some questions, please feel free to ask in the comment section.
60
+
61
+ **AI Tools**
62
+
63
+ Here are some additional resources:
64
+
65
+ * 20 Jan 2025 7 Raspberry Pi-Based Laptops and Tablets for Tinkerers
66
+ * 17 Jan 2025 Adding Grouped Items in Waybar
67
+ * Become a Better Linux User With the FOSS Weekly Newsletter, you learn useful Linux tips, discover applications, explore new distros and stay updated with the latest from Linux world,
68
+ * I Ran the Famed SmolLM on Raspberry Pi TEN AI: Open Source Framework for Quickly Creating Real-Time Multimodal AI Agents
69
+
70
+ # References
71
+
72
+ [1] https://www.reddit.com/r/ollama/comments/197thp1/does_anyone_know_how_ollama_works_under_the_hood/
73
+ [2] https://community.frame.work/t/ollama-framework-13-amd/53848
74
+ [3] https://www.reddit.com/r/LocalLLaMA/comments/1dhyxq8/why_use_ollama/
75
+ [4] https://itsfoss.com/ollama/
76
+ [5] https://abvijaykumar.medium.com/ollama-brings-runtime-to-serve-llms-everywhere-8a23b6f6a1b4
77
+ [6] https://www.listedai.co/ai/ollama
78
+ [7] https://community.n8n.io/t/ollama-embedding-does-not-accept-the-model-but-using-it-with-http-request-works/64457
79
+ [8] https://medium.com/@mauryaanoop3/ollama-a-deep-dive-into-running-large-language-models-locally-part-1-0a4b70b30982
80
+ [9] https://itsfoss.com/ollama/
81
+ [10] https://itsfoss.com/ollama
82
+ ```
demos/search_and_answer.md ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ```bash
2
+ % python ask.py -c -q "Why do we need agentic RAG even if we have ChatGPT?"
3
+ 2024-11-20 10:03:49,810 - INFO - Initializing converter ...
4
+ 2024-11-20 10:03:49,810 - INFO - ✅ Successfully initialized Docling.
5
+ 2024-11-20 10:03:49,810 - INFO - Initializing chunker ...
6
+ 2024-11-20 10:03:50,052 - INFO - ✅ Successfully initialized Chonkie.
7
+ 2024-11-20 10:03:50,414 - INFO - Initializing database ...
8
+ 2024-11-20 10:03:50,544 - INFO - ✅ Successfully initialized DuckDB.
9
+ 2024-11-20 10:03:50,545 - INFO - Searching the web ...
10
+ 2024-11-20 10:03:51,239 - INFO - ✅ Found 10 links for query: Why do we need agentic RAG even if we have ChatGPT?
11
+ 2024-11-20 10:03:51,239 - INFO - Scraping the URLs ...
12
+ 2024-11-20 10:03:51,239 - INFO - Scraping https://community.openai.com/t/how-to-use-rag-properly-and-what-types-of-query-it-is-good-at/658204 ...
13
+ 2024-11-20 10:03:51,240 - INFO - Scraping https://www.reddit.com/r/LangChain/comments/1ey94rs/is_rag_still_a_thing/ ...
14
+ 2024-11-20 10:03:51,242 - INFO - Scraping https://community.openai.com/t/prompt-engineering-for-rag/621495 ...
15
+ 2024-11-20 10:03:51,242 - INFO - Scraping https://www.linkedin.com/posts/elijahbutler_can-you-use-chat-gpt-as-a-data-analyst-activity-7227666801688461312-qk6v ...
16
+ 2024-11-20 10:03:51,243 - INFO - Scraping https://www.reddit.com/r/ChatGPTCoding/comments/1cft751/my_experience_with_github_copilot_vs_cursor/ ...
17
+ 2024-11-20 10:03:51,244 - INFO - Scraping https://www.ben-evans.com/benedictevans/2024/6/8/building-ai-products ...
18
+ 2024-11-20 10:03:51,244 - INFO - Scraping https://news.ycombinator.com/item?id=40739982 ...
19
+ 2024-11-20 10:03:51,245 - INFO - Scraping https://www.linkedin.com/posts/andrewyng_github-andrewyngtranslation-agent-activity-7206347897938866176-5tDJ ...
20
+ 2024-11-20 10:03:51,247 - INFO - Scraping https://medium.com/@sandyshah1990/starting-to-learn-agentic-rag-e7ec916c83a2 ...
21
+ 2024-11-20 10:03:51,248 - INFO - Scraping https://www.linkedin.com/posts/kurtcagle_agentic-rag-personalizing-and-optimizing-activity-7198097129993613312-z7Sm ...
22
+ 2024-11-20 10:03:51,836 - INFO - ✅ Successfully scraped https://www.ben-evans.com/benedictevans/2024/6/8/building-ai-products with length: 8824
23
+ 2024-11-20 10:03:51,839 - INFO - ✅ Successfully scraped https://medium.com/@sandyshah1990/starting-to-learn-agentic-rag-e7ec916c83a2 with length: 18260
24
+ 2024-11-20 10:03:51,852 - INFO - ✅ Successfully scraped https://community.openai.com/t/how-to-use-rag-properly-and-what-types-of-query-it-is-good-at/658204 with length: 9895
25
+ 2024-11-20 10:03:51,869 - INFO - ✅ Successfully scraped https://community.openai.com/t/prompt-engineering-for-rag/621495 with length: 21898
26
+ 2024-11-20 10:03:52,038 - INFO - ✅ Successfully scraped https://news.ycombinator.com/item?id=40739982 with length: 122350
27
+ 2024-11-20 10:03:52,227 - INFO - ✅ Successfully scraped https://www.linkedin.com/posts/andrewyng_github-andrewyngtranslation-agent-activity-7206347897938866176-5tDJ with length: 35845
28
+ 2024-11-20 10:03:52,425 - INFO - ✅ Successfully scraped https://www.linkedin.com/posts/kurtcagle_agentic-rag-personalizing-and-optimizing-activity-7198097129993613312-z7Sm with length: 24524
29
+ 2024-11-20 10:03:52,480 - INFO - ✅ Successfully scraped https://www.linkedin.com/posts/elijahbutler_can-you-use-chat-gpt-as-a-data-analyst-activity-7227666801688461312-qk6v with length: 25621
30
+ 2024-11-20 10:03:52,949 - INFO - ✅ Successfully scraped https://www.reddit.com/r/ChatGPTCoding/comments/1cft751/my_experience_with_github_copilot_vs_cursor/ with length: 5138
31
+ 2024-11-20 10:03:52,996 - INFO - ✅ Successfully scraped https://www.reddit.com/r/LangChain/comments/1ey94rs/is_rag_still_a_thing/ with length: 2486
32
+ 2024-11-20 10:03:52,996 - INFO - ✅ Scraped 10 URLs.
33
+ 2024-11-20 10:03:52,996 - INFO - Chunking the text ...
34
+ 2024-11-20 10:03:53,044 - INFO - ✅ Generated 75 chunks ...
35
+ 2024-11-20 10:03:53,044 - INFO - Saving 75 chunks to DB ...
36
+ 2024-11-20 10:03:53,065 - INFO - Embedding 10 batches of chunks ...
37
+ 2024-11-20 10:03:54,563 - INFO - ✅ Finished embedding.
38
+ 2024-11-20 10:03:55,583 - INFO - ✅ Created the vector index ...
39
+ 2024-11-20 10:03:55,677 - INFO - ✅ Created the full text search index ...
40
+ 2024-11-20 10:03:55,679 - INFO - ✅ Successfully embedded and saved chunks to DB.
41
+ 2024-11-20 10:03:55,679 - INFO - Querying the vector DB to get context ...
42
+ 2024-11-20 10:03:56,092 - INFO - Running full-text search ...
43
+ 2024-11-20 10:03:56,118 - INFO - ✅ Got 15 matched chunks.
44
+ 2024-11-20 10:03:56,118 - INFO - Running inference with context ...
45
+ 2024-11-20 10:04:00,968 - INFO - ✅ Finished inference API call.
46
+ 2024-11-20 10:04:00,969 - INFO - Generating output ...
47
+ # Answer
48
+
49
+ Agentic RAG (Retrieval-Augmented Generation) is necessary even with the existence of ChatGPT due to its multi-faceted capabilities that enhance the overall processing and retrieval of information. Specifically, Agentic RAG employs multiple agents that can manage retrieval tasks, document comparisons, and even perform specific operations like calculations, which are not inherently available in a single model like ChatGPT. This allows for a more streamlined and efficient process when addressing complex queries that require synthesis from various data points, ensuring that no critical context is lost during retrieval and generation processes[1][4]. Additionally, RAG's framework allows for greater flexibility and precision in handling varied types of queries, especially those that require comparative analysis or handling large volumes of data that exceed typical model limitations[2][5][6]. Furthermore, it enables the use of specialized agents that can focus on unique tasks, making the whole system more dynamic and capable of tackling intricate demands in real-time applications[4][6].
50
+
51
+ In short, while ChatGPT offers robust conversational capabilities, the agentic approach of RAG significantly broadens the scope and effectiveness of information processing for complex tasks.
52
+
53
+
54
+ # References
55
+
56
+ [1] https://community.openai.com/t/how-to-use-rag-properly-and-what-types-of-query-it-is-good-at/658204
57
+ [2] https://community.openai.com/t/how-to-use-rag-properly-and-what-types-of-query-it-is-good-at/658204
58
+ [3] https://community.openai.com/t/prompt-engineering-for-rag/621495
59
+ [4] https://community.openai.com/t/how-to-use-rag-properly-and-what-types-of-query-it-is-good-at/658204
60
+ [5] https://news.ycombinator.com/item?id=40739982
61
+ [6] https://www.linkedin.com/posts/elijahbutler_can-you-use-chat-gpt-as-a-data-analyst-activity-7227666801688461312-qk6v
62
+ [7] https://community.openai.com/t/prompt-engineering-for-rag/621495
63
+ [8] https://news.ycombinator.com/item?id=40739982
64
+ [9] https://www.linkedin.com/posts/elijahbutler_can-you-use-chat-gpt-as-a-data-analyst-activity-7227666801688461312-qk6v
65
+ [10] https://www.linkedin.com/posts/elijahbutler_can-you-use-chat-gpt-as-a-data-analyst-activity-7227666801688461312-qk6v
66
+ [11] https://community.openai.com/t/prompt-engineering-for-rag/621495
67
+ [12] https://news.ycombinator.com/item?id=40739982
68
+ [13] https://news.ycombinator.com/item?id=40739982
69
+ [14] https://news.ycombinator.com/item?id=40739982
70
+ [15] https://news.ycombinator.com/item?id=40739982
71
+ ```
demos/search_and_extract.md ADDED
@@ -0,0 +1,291 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ```bash
2
+ python ask.py -c -q "LLM Gen-AI Startups" -o extract --extract-schema-file instructions/extract_example.txt
3
+ 2024-11-20 10:06:34,308 - INFO - Initializing converter ...
4
+ 2024-11-20 10:06:34,308 - INFO - ✅ Successfully initialized Docling.
5
+ 2024-11-20 10:06:34,308 - INFO - Initializing chunker ...
6
+ 2024-11-20 10:06:34,546 - INFO - ✅ Successfully initialized Chonkie.
7
+ 2024-11-20 10:06:34,902 - INFO - Initializing database ...
8
+ 2024-11-20 10:06:35,047 - INFO - ✅ Successfully initialized DuckDB.
9
+ 2024-11-20 10:06:35,047 - INFO - Searching the web ...
10
+ 2024-11-20 10:06:35,409 - INFO - ✅ Found 10 links for query: LLM Gen-AI Startups
11
+ 2024-11-20 10:06:35,409 - INFO - Scraping the URLs ...
12
+ 2024-11-20 10:06:35,409 - INFO - Scraping https://www.ycombinator.com/companies/industry/generative-ai ...
13
+ 2024-11-20 10:06:35,409 - INFO - Scraping https://app.dealroom.co/lists/33530 ...
14
+ 2024-11-20 10:06:35,410 - INFO - Scraping https://explodingtopics.com/blog/generative-ai-startups ...
15
+ 2024-11-20 10:06:35,410 - INFO - Scraping https://www.reddit.com/r/Startup_Ideas/comments/1djstai/thoughts_on_llm_based_startups/ ...
16
+ 2024-11-20 10:06:35,411 - INFO - Scraping https://www.linkedin.com/pulse/20-gen-ai-healthcare-startups-shaping-future-recap-from-renee-yao-q7lkc ...
17
+ 2024-11-20 10:06:35,413 - INFO - Scraping https://www.reddit.com/r/learnprogramming/comments/1e0gzbo/are_most_ai_startups_these_days_just_openai/ ...
18
+ 2024-11-20 10:06:35,414 - INFO - Scraping https://a16z.com/ai/ ...
19
+ 2024-11-20 10:06:35,415 - INFO - Scraping https://praful-krishna.medium.com/thinking-of-an-llm-based-project-or-startup-dont-dd92c1a54237 ...
20
+ 2024-11-20 10:06:35,415 - INFO - Scraping https://medium.com/point-nine-news/where-are-the-opportunities-for-new-startups-in-generative-ai-f48068b5f8f9 ...
21
+ 2024-11-20 10:06:35,416 - INFO - Scraping https://www.eweek.com/artificial-intelligence/generative-ai-startups/ ...
22
+ 2024-11-20 10:06:35,636 - INFO - ✅ Successfully scraped https://explodingtopics.com/blog/generative-ai-startups with length: 17632
23
+ 2024-11-20 10:06:35,992 - INFO - ✅ Successfully scraped https://praful-krishna.medium.com/thinking-of-an-llm-based-project-or-startup-dont-dd92c1a54237 with length: 8612
24
+ 2024-11-20 10:06:36,133 - INFO - ✅ Successfully scraped https://medium.com/point-nine-news/where-are-the-opportunities-for-new-startups-in-generative-ai-f48068b5f8f9 with length: 3649
25
+ 2024-11-20 10:06:36,608 - INFO - ✅ Successfully scraped https://www.linkedin.com/pulse/20-gen-ai-healthcare-startups-shaping-future-recap-from-renee-yao-q7lkc with length: 13736
26
+ 2024-11-20 10:06:36,675 - INFO - ✅ Successfully scraped https://app.dealroom.co/lists/33530 with length: 208
27
+ 2024-11-20 10:06:36,934 - INFO - ✅ Successfully scraped https://a16z.com/ai/ with length: 14737
28
+ 2024-11-20 10:06:37,217 - INFO - ✅ Successfully scraped https://www.reddit.com/r/learnprogramming/comments/1e0gzbo/are_most_ai_startups_these_days_just_openai/ with length: 2069
29
+ 2024-11-20 10:06:37,314 - INFO - ✅ Successfully scraped https://www.reddit.com/r/Startup_Ideas/comments/1djstai/thoughts_on_llm_based_startups/ with length: 3112
30
+ 2024-11-20 10:06:37,556 - INFO - ✅ Successfully scraped https://www.ycombinator.com/companies/industry/generative-ai with length: 53344
31
+ 2024-11-20 10:06:37,582 - INFO - ✅ Successfully scraped https://www.eweek.com/artificial-intelligence/generative-ai-startups/ with length: 69127
32
+ 2024-11-20 10:06:37,582 - INFO - ✅ Scraped 10 URLs.
33
+ 2024-11-20 10:06:37,582 - INFO - Extracting structured data ...
34
+ 2024-11-20 10:06:59,368 - INFO - ✅ Finished inference API call. Extracted 99 items from https://www.ycombinator.com/companies/industry/generative-ai.
35
+ 2024-11-20 10:06:59,869 - INFO - ✅ Finished inference API call. Extracted 0 items from https://app.dealroom.co/lists/33530.
36
+ 2024-11-20 10:07:07,198 - INFO - ✅ Finished inference API call. Extracted 33 items from https://explodingtopics.com/blog/generative-ai-startups.
37
+ 2024-11-20 10:07:08,094 - INFO - ✅ Finished inference API call. Extracted 1 items from https://www.reddit.com/r/Startup_Ideas/comments/1djstai/thoughts_on_llm_based_startups/.
38
+ 2024-11-20 10:07:12,658 - INFO - ✅ Finished inference API call. Extracted 20 items from https://www.linkedin.com/pulse/20-gen-ai-healthcare-startups-shaping-future-recap-from-renee-yao-q7lkc.
39
+ 2024-11-20 10:07:13,667 - INFO - ✅ Finished inference API call. Extracted 0 items from https://www.reddit.com/r/learnprogramming/comments/1e0gzbo/are_most_ai_startups_these_days_just_openai/.
40
+ 2024-11-20 10:07:15,321 - INFO - ✅ Finished inference API call. Extracted 6 items from https://a16z.com/ai/.
41
+ 2024-11-20 10:07:17,139 - INFO - ✅ Finished inference API call. Extracted 3 items from https://praful-krishna.medium.com/thinking-of-an-llm-based-project-or-startup-dont-dd92c1a54237.
42
+ 2024-11-20 10:07:19,724 - INFO - ✅ Finished inference API call. Extracted 7 items from https://medium.com/point-nine-news/where-are-the-opportunities-for-new-startups-in-generative-ai-f48068b5f8f9.
43
+ 2024-11-20 10:07:39,284 - INFO - ✅ Finished inference API call. Extracted 75 items from https://www.eweek.com/artificial-intelligence/generative-ai-startups/.
44
+ 2024-11-20 10:07:39,284 - INFO - ✅ Finished extraction from all urls.
45
+ 2024-11-20 10:07:39,284 - INFO - Generating output ...
46
+ name,description,SourceURL
47
+ Humanloop,"Humanloop is the LLM evals platform for enterprises. Teams at Gusto, Vanta and Duolingo use Humanloop to ship reliable AI products. We enable you to adopt best practices for prompt management, evaluation and observability.",https://www.ycombinator.com/companies/industry/generative-ai
48
+ Truewind,"Truewind is AI-powered bookkeeping and finance software for startups. Using GPT-3, Truewind captures the business context that only founders have, making accounting easier and more accurate.",https://www.ycombinator.com/companies/industry/generative-ai
49
+ Shepherd,"Shepherd is a Learning assistant for schools to provide to their students. Shepherd seamlessly combines AI-enabled self-study, affordable tutoring, peer collaboration, and analytics for a personalized learning experience.",https://www.ycombinator.com/companies/industry/generative-ai
50
+ Remy,"Use Remy to discover upcoming engineering work, perform automatic triage and speed up your design reviews.",https://www.ycombinator.com/companies/industry/generative-ai
51
+ Hyperbound,Hyperbound is a simulated AI sales roleplay platform that turns ICP descriptions into interactive AI buyers in less than 2 minutes.,https://www.ycombinator.com/companies/industry/generative-ai
52
+ AI.Fashion,AI.Fashion is the AI creative suite for the fashion industry - modernizing the traditional design and go to market fashion processes with our advanced AI platform and design tools.,https://www.ycombinator.com/companies/industry/generative-ai
53
+ Infobot,"By using LLMs to generate news content, we reduce the cost of generating an article by over 1000x.",https://www.ycombinator.com/companies/industry/generative-ai
54
+ Magic Loops,Magic Loops are the fastest way to automate (almost) anything by combining generative AI with code.,https://www.ycombinator.com/companies/industry/generative-ai
55
+ Humanlike,"A better alternative to outsourcing accounts payable and receivable, using human-like AI to process invoices more efficiently.",https://www.ycombinator.com/companies/industry/generative-ai
56
+ Atla,"Atla helps developers find AI mistakes at scale, so they can build more reliable GenAI applications.",https://www.ycombinator.com/companies/industry/generative-ai
57
+ Contour,"Contour is building next-generation quality assurance to free engineering time and test products, end-to-end.",https://www.ycombinator.com/companies/industry/generative-ai
58
+ Mandel AI,Mandel surfaces supply chain disruptions and supplier updates with email AI.,https://www.ycombinator.com/companies/industry/generative-ai
59
+ Aqua Voice,Aqua is a voice-driven text editor that lets you speak naturally and writes down what you meant.,https://www.ycombinator.com/companies/industry/generative-ai
60
+ Sapling.ai,Sapling offers an API and SDK to help businesses integrate language models into their applications.,https://www.ycombinator.com/companies/industry/generative-ai
61
+ askLio,"askLio builds AI Copilots to help procurement teams at enterprises, reducing the procurement process from weeks to hours.",https://www.ycombinator.com/companies/industry/generative-ai
62
+ Marblism,"Marblism helps user describe their app, generating the database, back-end, and front-end.",https://www.ycombinator.com/companies/industry/generative-ai
63
+ Lumona,Lumona is an AI-enabled search engine featuring perspectives from social media to help understand search results.,https://www.ycombinator.com/companies/industry/generative-ai
64
+ DraftWise,DraftWise harnesses the power of AI for drafting and negotiation in the legal industry.,https://www.ycombinator.com/companies/industry/generative-ai
65
+ Montrey AI,Montrey AI helps companies analyze qualitative feedback and user engagement data.,https://www.ycombinator.com/companies/industry/generative-ai
66
+ Synch,Your Sales and Sales Ops team in a unified platform.,https://www.ycombinator.com/companies/industry/generative-ai
67
+ Tegon,Tegon is an open-source issue tracking tool designed for engineering teams.,https://www.ycombinator.com/companies/industry/generative-ai
68
+ Empower,Empower is a developer platform for fine-tuned LLMs.,https://www.ycombinator.com/companies/industry/generative-ai
69
+ Spine AI,Spine AI effectively translates business context and data schema into an AI analyst.,https://www.ycombinator.com/companies/industry/generative-ai
70
+ TruthSuite,TruthSuite provides a platform to enhance due diligence and research processes.,https://www.ycombinator.com/companies/industry/generative-ai
71
+ Senso,Senso is building an AI-powered knowledge base for customer support.,https://www.ycombinator.com/companies/industry/generative-ai
72
+ Parea AI,Parea AI is the essential developer platform for debugging and monitoring LLM applications.,https://www.ycombinator.com/companies/industry/generative-ai
73
+ Shasta Health,Shasta Health enables physical therapists to go independent using AI agents.,https://www.ycombinator.com/companies/industry/generative-ai
74
+ Arcimus,Arcimus uses LLMs to automate insurance premium audits.,https://www.ycombinator.com/companies/industry/generative-ai
75
+ Tavus,"At Tavus, we're building the human layer of AI for natural interaction.",https://www.ycombinator.com/companies/industry/generative-ai
76
+ Leena AI,"Leena AI answers employee questions automatically, streamlining HR processes.",https://www.ycombinator.com/companies/industry/generative-ai
77
+ Vocode,Vocode is an open-source voice AI platform.,https://www.ycombinator.com/companies/industry/generative-ai
78
+ OfOne,OfOne builds software to automate order taking at fast-food drive-thrus.,https://www.ycombinator.com/companies/industry/generative-ai
79
+ Spellbrush,Spellbrush is the world's leading generative AI studio.,https://www.ycombinator.com/companies/industry/generative-ai
80
+ VetRec,VetRec automates the process of taking clinical notes for veterinarians.,https://www.ycombinator.com/companies/industry/generative-ai
81
+ Orangewood Labs,Orangewood Labs creates affordable AI-powered industrial robotic arms.,https://www.ycombinator.com/companies/industry/generative-ai
82
+ Credal.ai,Credal.ai allows any employee to build AI Assistants for enterprise.,https://www.ycombinator.com/companies/industry/generative-ai
83
+ Diffuse Bio,Diffuse is building generative AI for protein design.,https://www.ycombinator.com/companies/industry/generative-ai
84
+ RenderNet,RenderNet transforms imaginative concepts into high-quality images.,https://www.ycombinator.com/companies/industry/generative-ai
85
+ Reworkd,Reworkd works on multimodal LLM agents to extract web data at scale.,https://www.ycombinator.com/companies/industry/generative-ai
86
+ Maven Bio,Maven Bio empowers business development teams with AI for BioPharma.,https://www.ycombinator.com/companies/industry/generative-ai
87
+ Mathos,Mathos AI is the leading AI math solver for educational productivity.,https://www.ycombinator.com/companies/industry/generative-ai
88
+ Traceloop,Traceloop monitors the quality of LLM applications in production.,https://www.ycombinator.com/companies/industry/generative-ai
89
+ MediSearch,MediSearch provides direct answers to medical questions.,https://www.ycombinator.com/companies/industry/generative-ai
90
+ Syncly,Syncly helps product teams analyze communications to prevent churn.,https://www.ycombinator.com/companies/industry/generative-ai
91
+ Magic Patterns,Magic Patterns helps software teams prototype product ideas.,https://www.ycombinator.com/companies/industry/generative-ai
92
+ Glade,Glade uses AI to create a new genre of video games.,https://www.ycombinator.com/companies/industry/generative-ai
93
+ Pyq AI,Pyq AI builds automations to streamline information extraction.,https://www.ycombinator.com/companies/industry/generative-ai
94
+ Indexical,Indexical is a developer tool for SaaS and B2B.,https://www.ycombinator.com/companies/industry/generative-ai
95
+ Kobalt Labs,Kobalt automates manual risk and compliance operations.,https://www.ycombinator.com/companies/industry/generative-ai
96
+ Khoj,Khoj is an open-source AI application for personalized assistance.,https://www.ycombinator.com/companies/industry/generative-ai
97
+ Flint,Flint is an AI platform for K-12 education.,https://www.ycombinator.com/companies/industry/generative-ai
98
+ Reforged Labs,Reforged Labs launches AI-powered video creation service.,https://www.ycombinator.com/companies/industry/generative-ai
99
+ Unsloth AI,Unsloth helps builders create custom models better and faster.,https://www.ycombinator.com/companies/industry/generative-ai
100
+ Rosebud AI,Rosebud builds the AI Roblox for easy game creation.,https://www.ycombinator.com/companies/industry/generative-ai
101
+ VectorShift,VectorShift is an AI automations platform for knowledge generation.,https://www.ycombinator.com/companies/industry/generative-ai
102
+ Inari,Inari surfaces customer insights from feedback automatically.,https://www.ycombinator.com/companies/industry/generative-ai
103
+ VideoGen,"VideoGen makes it easy to create professional, copyright-free videos.",https://www.ycombinator.com/companies/industry/generative-ai
104
+ Infeedo AI,Infeedo AI helps enhance employee experience with conversational AI.,https://www.ycombinator.com/companies/industry/generative-ai
105
+ sudocode,sudocode lets users code in plain English.,https://www.ycombinator.com/companies/industry/generative-ai
106
+ ideate.xyz,ideate.xyz is a graphics design as API platform.,https://www.ycombinator.com/companies/industry/generative-ai
107
+ PlayHT,Play is a Voice AI company specializing in conversational voice models.,https://www.ycombinator.com/companies/industry/generative-ai
108
+ Inventive AI,Inventive is an AI-powered platform for managing RFP & questionnaire responses.,https://www.ycombinator.com/companies/industry/generative-ai
109
+ Proxis,Proxis is dedicated to LLM distillation unlock production ready models.,https://www.ycombinator.com/companies/industry/generative-ai
110
+ Zuni,Zuni is an AI productivity tool.,https://www.ycombinator.com/companies/industry/generative-ai
111
+ reworks,reworks helps integrate agentic AI companies with external software.,https://www.ycombinator.com/companies/industry/generative-ai
112
+ Kalam Labs,Kalam Labs is creating a space for kids to participate in ambitious space missions.,https://www.ycombinator.com/companies/industry/generative-ai
113
+ Passage,Passage is a co-pilot for the customs brokering space.,https://www.ycombinator.com/companies/industry/generative-ai
114
+ camfer,camfer helps mechanical engineers collaborate on design tasks.,https://www.ycombinator.com/companies/industry/generative-ai
115
+ Pibit.ai,Pibit transforms loss run files into comprehensive reports.,https://www.ycombinator.com/companies/industry/generative-ai
116
+ Merse,Merse builds visual stories like comics but with voices and sound effects.,https://www.ycombinator.com/companies/industry/generative-ai
117
+ Letterdrop,Letterdrop helps understand what content drives revenue.,https://www.ycombinator.com/companies/industry/generative-ai
118
+ Pulse AI,Pulse automates procurement with AI.,https://www.ycombinator.com/companies/industry/generative-ai
119
+ Tara AI,Tara AI measures and improves engineering efficiency.,https://www.ycombinator.com/companies/industry/generative-ai
120
+ Jasper.ai,Jasper is an AI content platform for creators and companies.,https://www.ycombinator.com/companies/industry/generative-ai
121
+ Ego,Ego is a generative AI-powered simulation engine for creators.,https://www.ycombinator.com/companies/industry/generative-ai
122
+ Sameday,Sameday's AI Sales Agent answers calls for home service businesses.,https://www.ycombinator.com/companies/industry/generative-ai
123
+ dmodel,dmodel lets companies manipulate AI model thoughts in real time.,https://www.ycombinator.com/companies/industry/generative-ai
124
+ Playground,Playground combines AI research and product design.,https://www.ycombinator.com/companies/industry/generative-ai
125
+ Hypotenuse AI,Hypotenuse turns keywords into blog articles and copywriting.,https://www.ycombinator.com/companies/industry/generative-ai
126
+ Simplify,Simplify is re-imagining the job-searching process.,https://www.ycombinator.com/companies/industry/generative-ai
127
+ Mem0,Mem0 provides a memory layer for LLM applications.,https://www.ycombinator.com/companies/industry/generative-ai
128
+ Benchify,Benchify is a code review tool that tests code rigorously.,https://www.ycombinator.com/companies/industry/generative-ai
129
+ Saturn,Saturn is an AI-powered operating system for wealth management.,https://www.ycombinator.com/companies/industry/generative-ai
130
+ MagiCode,MagiCode automates testing code in the frontend.,https://www.ycombinator.com/companies/industry/generative-ai
131
+ Redouble AI,Redouble AI scales human-in-the-loop for AI workflows.,https://www.ycombinator.com/companies/industry/generative-ai
132
+ Ankr Health,Ankr uses generative AI to recreate clinic functions.,https://www.ycombinator.com/companies/industry/generative-ai
133
+ innkeeper,innkeeper provides dynamic pricing and other automations for hotels.,https://www.ycombinator.com/companies/industry/generative-ai
134
+ AlphaWatch AI,AlphaWatch AI improves research for hedge funds using LLMs.,https://www.ycombinator.com/companies/industry/generative-ai
135
+ D-ID,D-ID generates realistic high-quality AI personas using deep-learning.,https://www.ycombinator.com/companies/industry/generative-ai
136
+ iollo,iollo is an at-home metabolomics test for health optimization.,https://www.ycombinator.com/companies/industry/generative-ai
137
+ Unify,Unify allows building evals for LLMs for production.,https://www.ycombinator.com/companies/industry/generative-ai
138
+ Activeloop,Activeloop provides APIs for collaborative AI datasets.,https://www.ycombinator.com/companies/industry/generative-ai
139
+ Moonvalley,Moonvalley is building a creative studio powered by generative AI.,https://www.ycombinator.com/companies/industry/generative-ai
140
+ Kura AI,Kura is SOTA for giving AI agents the tools for website interactions.,https://www.ycombinator.com/companies/industry/generative-ai
141
+ MixerBox,MixerBox helps people live easier through mobile apps.,https://www.ycombinator.com/companies/industry/generative-ai
142
+ SchemeFlow,SchemeFlow automates approvals for construction projects.,https://www.ycombinator.com/companies/industry/generative-ai
143
+ ZOKO,Zoko facilitates business communication on WhatsApp.,https://www.ycombinator.com/companies/industry/generative-ai
144
+ Praxos,Praxos allows insurance professionals to automate their operations.,https://www.ycombinator.com/companies/industry/generative-ai
145
+ Odo,Odo helps companies win government contracts using AI.,https://www.ycombinator.com/companies/industry/generative-ai
146
+ Cohere,Cohere is an AI startup that builds multilingual LLMs for enterprise businesses to streamline tasks.,https://explodingtopics.com/blog/generative-ai-startups
147
+ Hugging Face,"Hugging Face is a collaborative AI community that creates tools for developers, with over 61,000 pre-trained models and 7,000 datasets.",https://explodingtopics.com/blog/generative-ai-startups
148
+ Tabnine,Tabnine is an AI assistant for software developers that uses generative AI to predict or suggest the next lines of code.,https://explodingtopics.com/blog/generative-ai-startups
149
+ Soundraw,Soundraw is a royalty-free AI music generator that allows creators to make original songs and retain ownership.,https://explodingtopics.com/blog/generative-ai-startups
150
+ Tome.app,Tome is an AI-powered storytelling platform that facilitates the creation of presentations using generative AI.,https://explodingtopics.com/blog/generative-ai-startups
151
+ AssemblyAI,AssemblyAI is an AI-as-a-service startup providing APIs for automated speech transcription and advanced content moderation.,https://explodingtopics.com/blog/generative-ai-startups
152
+ Promptbase,Promptbase is a marketplace for buying and selling prompts to generate predictive results using generative AI tools.,https://explodingtopics.com/blog/generative-ai-startups
153
+ PhotoRoom,PhotoRoom is an AI-powered photo editing tool that blends generative AI with traditional editing tools.,https://explodingtopics.com/blog/generative-ai-startups
154
+ Taskade,"Taskade is a generative AI productivity tool focused on task management, note-taking, and team collaboration.",https://explodingtopics.com/blog/generative-ai-startups
155
+ Synthesia,Synthesia AI is a generative AI video maker that creates videos from text inputs.,https://explodingtopics.com/blog/generative-ai-startups
156
+ Humata AI,Humata AI integrates with desktop to let users ask questions and get answers about specific documents.,https://explodingtopics.com/blog/generative-ai-startups
157
+ Chatbase,Chatbase is an integrated chatbot for websites that provides instant answers to customer inquiries.,https://explodingtopics.com/blog/generative-ai-startups
158
+ Stability AI,Stability AI is the creator of Stable Diffusion and develops open-source models for image generation.,https://explodingtopics.com/blog/generative-ai-startups
159
+ Anyword,Anyword is a generative AI content generation platform using natural language processing to write copy.,https://explodingtopics.com/blog/generative-ai-startups
160
+ Rephrase AI,Rephrase AI is a text-to-video generation platform allowing customers to create videos with customizable avatars.,https://explodingtopics.com/blog/generative-ai-startups
161
+ Inworld AI,Inworld AI implements AI-powered character generation for video games using natural language processing.,https://explodingtopics.com/blog/generative-ai-startups
162
+ Runway,Runway is a generative AI video editing platform that creates video clips based on text prompts.,https://explodingtopics.com/blog/generative-ai-startups
163
+ Sudowrite,Sudowrite is an AI writing assistant specifically designed for novel writing and storytelling.,https://explodingtopics.com/blog/generative-ai-startups
164
+ Steve.ai,Steve.ai is an online video creation platform that turns text prompts into animated videos.,https://explodingtopics.com/blog/generative-ai-startups
165
+ PlayHT,PlayHT is a text-to-speech software using generative AI to convert written text into human-like audio.,https://explodingtopics.com/blog/generative-ai-startups
166
+ Elicit,Elicit is a generative AI research tool for analyzing and summarizing academic papers.,https://explodingtopics.com/blog/generative-ai-startups
167
+ TalkPal,TalkPal is an AI-powered language learning platform offering personalized tutor sessions in 57 languages.,https://explodingtopics.com/blog/generative-ai-startups
168
+ Dubverse,Dubverse is an AI video dubbing platform that translates videos into multiple languages.,https://explodingtopics.com/blog/generative-ai-startups
169
+ Codeium,Codeium is an AI-powered toolkit for developers to assist with code creation and translation.,https://explodingtopics.com/blog/generative-ai-startups
170
+ Fliki,Fliki is an AI video and audio generation platform allowing for quick video creation from text prompts.,https://explodingtopics.com/blog/generative-ai-startups
171
+ LOVO AI,LOVO is an AI voice generator capable of creating realistic voice cloning and text-to-speech functionality.,https://explodingtopics.com/blog/generative-ai-startups
172
+ Decktopus,Decktopus helps users create presentations from prompts by generating personalized slide content.,https://explodingtopics.com/blog/generative-ai-startups
173
+ Character.ai,Character AI is a generative AI platform for creating animated 3D characters that interact in conversations.,https://explodingtopics.com/blog/generative-ai-startups
174
+ Descript,Descript is a generative AI video and audio editing application designed for podcasters and videographers.,https://explodingtopics.com/blog/generative-ai-startups
175
+ Papercup,Papercup uses machine learning to translate speech and create voiceovers for video content.,https://explodingtopics.com/blog/generative-ai-startups
176
+ Vizcom,Vizcom is a generative AI tool that assists designers by turning sketches into 3D concept drawings.,https://explodingtopics.com/blog/generative-ai-startups
177
+ Vidnoz,Vidnoz is a free AI video platform enabling users to create videos with various AI features.,https://explodingtopics.com/blog/generative-ai-startups
178
+ Scalenut,Scalenut is a generative AI-powered SEO and content marketing platform useful for content creation and optimization.,https://explodingtopics.com/blog/generative-ai-startups
179
+ Autonomous Agents,"Startups focused on autonomous agents, which have potential for genuine problem-solving using AI.",https://www.reddit.com/r/Startup_Ideas/comments/1djstai/thoughts_on_llm_based_startups/
180
+ Huma.AI,"A generative AI for life sciences SaaS platform, recognized by Gartner, following its collaboration with OpenAI to deploy a validated GenAI solution for medical affairs.",https://www.linkedin.com/pulse/20-gen-ai-healthcare-startups-shaping-future-recap-from-renee-yao-q7lkc
181
+ Viz.ai,"A medical imaging startup specializing in stroke care, using LLMs for early disease detection.",https://www.linkedin.com/pulse/20-gen-ai-healthcare-startups-shaping-future-recap-from-renee-yao-q7lkc
182
+ Arionkoder,"A product development studio and AI lab service company with expertise in AI, computer vision, and natural language processing.",https://www.linkedin.com/pulse/20-gen-ai-healthcare-startups-shaping-future-recap-from-renee-yao-q7lkc
183
+ HeHealth,Employs AI and LLM technologies to deliver efficient recommendations for male care.,https://www.linkedin.com/pulse/20-gen-ai-healthcare-startups-shaping-future-recap-from-renee-yao-q7lkc
184
+ HOPPR,A multimodal imaging platform that facilitates deep image analysis and improves medical processes.,https://www.linkedin.com/pulse/20-gen-ai-healthcare-startups-shaping-future-recap-from-renee-yao-q7lkc
185
+ Medical IP,A medical metaverse solution utilizing generative AI for streamlined medical imaging segmentation.,https://www.linkedin.com/pulse/20-gen-ai-healthcare-startups-shaping-future-recap-from-renee-yao-q7lkc
186
+ NexusMD,An LLM-powered medical imaging platform that automates medical imaging data capture.,https://www.linkedin.com/pulse/20-gen-ai-healthcare-startups-shaping-future-recap-from-renee-yao-q7lkc
187
+ Abridge,A generative AI for clinical documentation that converts patient-clinician conversations into structured clinical notes.,https://www.linkedin.com/pulse/20-gen-ai-healthcare-startups-shaping-future-recap-from-renee-yao-q7lkc
188
+ Autonomize AI,A healthcare-optimized AI platform utilizing several LLMs for various operational efficiencies.,https://www.linkedin.com/pulse/20-gen-ai-healthcare-startups-shaping-future-recap-from-renee-yao-q7lkc
189
+ DeepScribe,A med-tech firm leveraging LLMs to automate clinical documentation.,https://www.linkedin.com/pulse/20-gen-ai-healthcare-startups-shaping-future-recap-from-renee-yao-q7lkc
190
+ HiLabs,Works with major health plans to refine dirty data using advanced AI and LLMs.,https://www.linkedin.com/pulse/20-gen-ai-healthcare-startups-shaping-future-recap-from-renee-yao-q7lkc
191
+ Nabla,"Offers Copilot, an ambient AI solution for clinical note generation.",https://www.linkedin.com/pulse/20-gen-ai-healthcare-startups-shaping-future-recap-from-renee-yao-q7lkc
192
+ AgentifAI,A voice-first AI assistant for healthcare that enhances patient customer experience.,https://www.linkedin.com/pulse/20-gen-ai-healthcare-startups-shaping-future-recap-from-renee-yao-q7lkc
193
+ Artisight,Deployed in hospitals with an end-to-end sensor fusion platform solution leveraging an encoder LLM.,https://www.linkedin.com/pulse/20-gen-ai-healthcare-startups-shaping-future-recap-from-renee-yao-q7lkc
194
+ dacadoo,A digital health platform connecting to various devices and integrating an LLM-based streaming model.,https://www.linkedin.com/pulse/20-gen-ai-healthcare-startups-shaping-future-recap-from-renee-yao-q7lkc
195
+ Hippocratic AI,Developing the healthcare industry’s first safety-focused LLM for patient-facing applications.,https://www.linkedin.com/pulse/20-gen-ai-healthcare-startups-shaping-future-recap-from-renee-yao-q7lkc
196
+ Idoven,"Developed Willem-AI, an AI-powered cardiology platform for identifying and diagnosing patients.",https://www.linkedin.com/pulse/20-gen-ai-healthcare-startups-shaping-future-recap-from-renee-yao-q7lkc
197
+ Inference Analytics,A generative AI healthcare platform trained on 450M+ medical records with applications for healthcare parties.,https://www.linkedin.com/pulse/20-gen-ai-healthcare-startups-shaping-future-recap-from-renee-yao-q7lkc
198
+ Pingoo,An AI health chatbot that provides personalized health education and engagement for diabetes patients.,https://www.linkedin.com/pulse/20-gen-ai-healthcare-startups-shaping-future-recap-from-renee-yao-q7lkc
199
+ Talkie.ai,Automates patient phone interactions using AI voice and LLM technology.,https://www.linkedin.com/pulse/20-gen-ai-healthcare-startups-shaping-future-recap-from-renee-yao-q7lkc
200
+ LLMflation,LLM inference cost is going down fast.,https://a16z.com/ai/
201
+ How to Build a Thriving AI Ecosystem,Insights on building a successful AI ecosystem.,https://a16z.com/ai/
202
+ The Economic Case for Generative AI and Foundation Models,Exploring the financial implications and advantages of generative AI.,https://a16z.com/ai/
203
+ Emerging Architectures for LLM Applications,Discussing new architectural models for LLM applications.,https://a16z.com/ai/
204
+ How Generative AI Is Remaking UI/UX Design,Impact of generative AI on user interface and user experience design.,https://a16z.com/ai/
205
+ The Top 100 Gen AI Consumer Apps,Analyzing the most popular generative AI consumer applications.,https://a16z.com/ai/
206
+ OpenAI,"Developer of ChatGPT and GPT-4, providing LLM APIs with functionalities like plugins, function calling, and integration with Whisper models.",https://praful-krishna.medium.com/thinking-of-an-llm-based-project-or-startup-dont-dd92c1a54237
207
+ Coseer,A startup that faced challenges in convincing the market to adopt its LLM-based solutions.,https://praful-krishna.medium.com/thinking-of-an-llm-based-project-or-startup-dont-dd92c1a54237
208
+ Anthropic,An LLM provider known for having reasonable and transparent security policies.,https://praful-krishna.medium.com/thinking-of-an-llm-based-project-or-startup-dont-dd92c1a54237
209
+ beautiful.ai,A startup creating innovative tools for presentations.,https://medium.com/point-nine-news/where-are-the-opportunities-for-new-startups-in-generative-ai-f48068b5f8f9
210
+ Tome,A startup providing a platform for presentations.,https://medium.com/point-nine-news/where-are-the-opportunities-for-new-startups-in-generative-ai-f48068b5f8f9
211
+ Rows,A startup offering tools for spreadsheets.,https://medium.com/point-nine-news/where-are-the-opportunities-for-new-startups-in-generative-ai-f48068b5f8f9
212
+ mem,A startup focused on note-taking solutions.,https://medium.com/point-nine-news/where-are-the-opportunities-for-new-startups-in-generative-ai-f48068b5f8f9
213
+ Clio,A practice management solution for law firms that has access to extensive data.,https://medium.com/point-nine-news/where-are-the-opportunities-for-new-startups-in-generative-ai-f48068b5f8f9
214
+ Bench,An accounting service that exemplifies the auto-pilot business model.,https://medium.com/point-nine-news/where-are-the-opportunities-for-new-startups-in-generative-ai-f48068b5f8f9
215
+ Pilot,A recent accounting service exploring the auto-pilot approach.,https://medium.com/point-nine-news/where-are-the-opportunities-for-new-startups-in-generative-ai-f48068b5f8f9
216
+ OpenAI,"OpenAI is the highest profile company in the generative AI space, known for its prebuilt AI solutions and API and application development support for developers.",https://www.eweek.com/artificial-intelligence/generative-ai-startups/
217
+ Anthropic,"Anthropic’s Claude platform focuses on content generation, providing a customizable chatbot experience.",https://www.eweek.com/artificial-intelligence/generative-ai-startups/
218
+ Cohere,Cohere offers NLP solutions designed to support business operations through its conversational AI agent.,https://www.eweek.com/artificial-intelligence/generative-ai-startups/
219
+ Glean,Glean is an enterprise search company that uses deep-learning models to understand and answer natural language queries.,https://www.eweek.com/artificial-intelligence/generative-ai-startups/
220
+ Jasper,"Jasper's core product is designed for marketing content generation, helping users create social media, advertising, and blog content.",https://www.eweek.com/artificial-intelligence/generative-ai-startups/
221
+ Hugging Face,"Hugging Face is a community forum for AI and ML model development, known for its open-source LLM that generates content in multiple languages.",https://www.eweek.com/artificial-intelligence/generative-ai-startups/
222
+ Inflection AI,"Inflection AI focuses on personal AI tools, including Pi, which emphasizes colloquial conversation.",https://www.eweek.com/artificial-intelligence/generative-ai-startups/
223
+ Stability AI,"Stability AI is known for its popular app Stable Diffusion, a tool for image and video content generation.",https://www.eweek.com/artificial-intelligence/generative-ai-startups/
224
+ MOSTLY AI,"MOSTLY AI’s platform balances data democratization with data security, specializing in synthetic data generation.",https://www.eweek.com/artificial-intelligence/generative-ai-startups/
225
+ Lightricks,"Lightricks creates AI-powered apps for media editing, including notable products like Facetune.",https://www.eweek.com/artificial-intelligence/generative-ai-startups/
226
+ AI21 Labs,AI21 Labs creates tools for contextual natural language processing and offers third-party developers access to its language models.,https://www.eweek.com/artificial-intelligence/generative-ai-startups/
227
+ Tabnine,"Tabnine offers generative AI code assistance for software development, focusing on code completion and automation.",https://www.eweek.com/artificial-intelligence/generative-ai-startups/
228
+ Mistral AI,Mistral AI provides access to open generative AI models and developer-friendly resources.,https://www.eweek.com/artificial-intelligence/generative-ai-startups/
229
+ Codeium,Codeium provides resources for generating logical code and autocompletion for users.,https://www.eweek.com/artificial-intelligence/generative-ai-startups/
230
+ Clarifai,"Clarifai's platform supports AI-driven data labeling and preparation, alongside model building capabilities.",https://www.eweek.com/artificial-intelligence/generative-ai-startups/
231
+ Gong,Gong offers revenue intelligence solutions using AI to support customer service and sales effectiveness.,https://www.eweek.com/artificial-intelligence/generative-ai-startups/
232
+ Twain,Twain is an AI writing assistant aimed at helping sales professionals generate effective outreach content.,https://www.eweek.com/artificial-intelligence/generative-ai-startups/
233
+ Bertha.ai,Bertha.ai is a content generation application specifically designed for WordPress and similar platforms.,https://www.eweek.com/artificial-intelligence/generative-ai-startups/
234
+ Tome,"Tome creates a versatile platform for AI-based presentations, helping users generate insightful content.",https://www.eweek.com/artificial-intelligence/generative-ai-startups/
235
+ CopyAI,CopyAI focuses on enabling go-to-market workflows through generative content creation and task automation.,https://www.eweek.com/artificial-intelligence/generative-ai-startups/
236
+ Narrative BI,Narrative BI turns business intelligence data into understandable narratives for decision-making.,https://www.eweek.com/artificial-intelligence/generative-ai-startups/
237
+ Anyword,Anyword is a writing solution that optimizes content performance for marketing.,https://www.eweek.com/artificial-intelligence/generative-ai-startups/
238
+ Synthesia,"Synthesia specializes in AI video production, allowing users to create videos from text inputs.",https://www.eweek.com/artificial-intelligence/generative-ai-startups/
239
+ Midjourney,Midjourney is known for generating high-quality images based on natural language prompts.,https://www.eweek.com/artificial-intelligence/generative-ai-startups/
240
+ MURF.AI,MURF.AI is a voice AI generation company with multilingual capabilities.,https://www.eweek.com/artificial-intelligence/generative-ai-startups/
241
+ PlayHT,PlayHT specializes in AI-generated voice content and podcast production.,https://www.eweek.com/artificial-intelligence/generative-ai-startups/
242
+ ElevenLabs,"ElevenLabs produces high-quality voice generation technology, offering features for text to speech.",https://www.eweek.com/artificial-intelligence/generative-ai-startups/
243
+ Colossyan,Colossyan is focused on creating high-quality corporate training videos using AI.,https://www.eweek.com/artificial-intelligence/generative-ai-startups/
244
+ AssemblyAI,AssemblyAI provides speech-to-text models tailored for enterprise usage.,https://www.eweek.com/artificial-intelligence/generative-ai-startups/
245
+ Plask,"Plask offers tools for automated animation, making motion design easier.",https://www.eweek.com/artificial-intelligence/generative-ai-startups/
246
+ LOVO,LOVO is a comprehensive AI platform for video and voice generation.,https://www.eweek.com/artificial-intelligence/generative-ai-startups/
247
+ DeepBrain AI,DeepBrain AI focuses on video generation and interactive human avatars.,https://www.eweek.com/artificial-intelligence/generative-ai-startups/
248
+ Elai.io,Elai.io provides AI video generation tools designed for the business sector.,https://www.eweek.com/artificial-intelligence/generative-ai-startups/
249
+ Sudowrite,"Sudowrite is a writing support tool for authors, enhancing creativity and storytelling.",https://www.eweek.com/artificial-intelligence/generative-ai-startups/
250
+ Tavus,Tavus personalizes video content for different viewer requirements through generative technology.,https://www.eweek.com/artificial-intelligence/generative-ai-startups/
251
+ Hippocratic AI,"Hippocratic AI develops AI solutions for healthcare, ensuring compliance with privacy standards.",https://www.eweek.com/artificial-intelligence/generative-ai-startups/
252
+ Paige AI,Paige AI optimizes cancer diagnostics using advanced machine learning techniques.,https://www.eweek.com/artificial-intelligence/generative-ai-startups/
253
+ Iambic Therapeutics,Iambic focuses on drug discovery and development using advanced AI methodologies.,https://www.eweek.com/artificial-intelligence/generative-ai-startups/
254
+ Insilico Medicine,Insilico utilizes generative AI for drug development and research in various medical fields.,https://www.eweek.com/artificial-intelligence/generative-ai-startups/
255
+ Etcembly,Etcembly focuses on improving immunotherapies using machine learning.,https://www.eweek.com/artificial-intelligence/generative-ai-startups/
256
+ Biomatter,Biomatter uses its Intelligent Architecture platform for protein design and manufacturing.,https://www.eweek.com/artificial-intelligence/generative-ai-startups/
257
+ Activ Surgical,Activ Surgical enhances surgical intelligence with real-time data visualization.,https://www.eweek.com/artificial-intelligence/generative-ai-startups/
258
+ Kaliber Labs,Kaliber develops AI-powered surgical software solutions for improved medical procedures.,https://www.eweek.com/artificial-intelligence/generative-ai-startups/
259
+ Osmo,"Osmo applies machine learning to olfactory science, aiming to predict scents.",https://www.eweek.com/artificial-intelligence/generative-ai-startups/
260
+ Aqemia,Aqemia leverages AI for faster drug discovery and development.,https://www.eweek.com/artificial-intelligence/generative-ai-startups/
261
+ Synthetaic,Synthetaic generates AI models for analyzing unstructured datasets.,https://www.eweek.com/artificial-intelligence/generative-ai-startups/
262
+ Synthesis AI,Synthesis AI specializes in synthetic data generation targeted for various industries.,https://www.eweek.com/artificial-intelligence/generative-ai-startups/
263
+ Syntho,Syntho provides synthesized data generation and analytics solutions.,https://www.eweek.com/artificial-intelligence/generative-ai-startups/
264
+ GenRocket,GenRocket emphasizes dynamic and automated test data generation.,https://www.eweek.com/artificial-intelligence/generative-ai-startups/
265
+ Gridspace,Gridspace offers AI solutions to optimize customer interaction in contact centers.,https://www.eweek.com/artificial-intelligence/generative-ai-startups/
266
+ Revery AI,Revery AI focuses on creating virtual try-on experiences in fashion.,https://www.eweek.com/artificial-intelligence/generative-ai-startups/
267
+ Veesual,Veesual enables virtual try-ons through deep learning and image generation.,https://www.eweek.com/artificial-intelligence/generative-ai-startups/
268
+ Frame AI,Frame AI uses AI to provide audience analytics and insights.,https://www.eweek.com/artificial-intelligence/generative-ai-startups/
269
+ Zowie,Zowie produces AI-driven customer service solutions for e-commerce.,https://www.eweek.com/artificial-intelligence/generative-ai-startups/
270
+ Forethought,Forethought develops generative AI technology for improved customer service.,https://www.eweek.com/artificial-intelligence/generative-ai-startups/
271
+ Lily AI,Lily AI uses AI for product management and enhancing customer experiences.,https://www.eweek.com/artificial-intelligence/generative-ai-startups/
272
+ Runway,Runway produces AI-powered video content creation tools.,https://www.eweek.com/artificial-intelligence/generative-ai-startups/
273
+ Latitude.io,Latitude.io is known for creating AI-driven gaming experiences.,https://www.eweek.com/artificial-intelligence/generative-ai-startups/
274
+ Character.AI,Character.AI allows users to interact with conversational characters.,https://www.eweek.com/artificial-intelligence/generative-ai-startups/
275
+ Charisma Entertainment,Charisma offers tools for developing interactive storytelling in various mediums.,https://www.eweek.com/artificial-intelligence/generative-ai-startups/
276
+ Replika,Replika creates AI companions for personal conversations and interactions.,https://www.eweek.com/artificial-intelligence/generative-ai-startups/
277
+ Aimi.fm,Aimi.fm generates music content for various media and users.,https://www.eweek.com/artificial-intelligence/generative-ai-startups/
278
+ Inworld AI,Inworld AI develops realistic NPC characters for gaming and training.,https://www.eweek.com/artificial-intelligence/generative-ai-startups/
279
+ SOUNDRAW,SOUNDRAW offers music composition tools for content generation.,https://www.eweek.com/artificial-intelligence/generative-ai-startups/
280
+ Notion,Notion provides a collaborative workspace solution with AI-enhanced features.,https://www.eweek.com/artificial-intelligence/generative-ai-startups/
281
+ Harvey,Harvey offers legal AI solutions for document handling and services.,https://www.eweek.com/artificial-intelligence/generative-ai-startups/
282
+ Ironclad,Ironclad focuses on AI contract management across various sectors.,https://www.eweek.com/artificial-intelligence/generative-ai-startups/
283
+ Taskade,Taskade uses AI to aid in task and project management.,https://www.eweek.com/artificial-intelligence/generative-ai-startups/
284
+ Humata,Humata offers AI-powered tools to extract insights from dense documents.,https://www.eweek.com/artificial-intelligence/generative-ai-startups/
285
+ Simplifai,Simplifai provides automation tools for highly regulated industries.,https://www.eweek.com/artificial-intelligence/generative-ai-startups/
286
+ PatentPal,PatentPal streamlines patent application processes with AI-generated content.,https://www.eweek.com/artificial-intelligence/generative-ai-startups/
287
+ Adept AI,Adept AI automates workplace interactions with generative AI tools.,https://www.eweek.com/artificial-intelligence/generative-ai-startups/
288
+ Perplexity AI,Perplexity AI is an AI search engine focused on providing personalized results.,https://www.eweek.com/artificial-intelligence/generative-ai-startups/
289
+ Andi,Andi is a generative AI search bot designed for user-friendly information retrieval.,https://www.eweek.com/artificial-intelligence/generative-ai-startups/
290
+ You.com,You.com is a secure search engine that personalizes results with generative AI.,https://www.eweek.com/artificial-intelligence/generative-ai-startups/
291
+ ```
demos/search_on_site_and_date.md ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ This following query will only use the information from openai.com that are updated in the previous
2
+ day. The behavior is similar to the "site:openai.com" and "date-restrict" search parameters in Google
3
+ search.
4
+
5
+ ```bash
6
+ python ask.py -c -q "OpenAI Swarm Framework" -d 1 -s openai.com
7
+ 2024-11-20 10:05:45,949 - INFO - Initializing converter ...
8
+ 2024-11-20 10:05:45,949 - INFO - ✅ Successfully initialized Docling.
9
+ 2024-11-20 10:05:45,949 - INFO - Initializing chunker ...
10
+ 2024-11-20 10:05:46,185 - INFO - ✅ Successfully initialized Chonkie.
11
+ 2024-11-20 10:05:46,499 - INFO - Initializing database ...
12
+ 2024-11-20 10:05:46,591 - INFO - ✅ Successfully initialized DuckDB.
13
+ 2024-11-20 10:05:46,591 - INFO - Searching the web ...
14
+ 2024-11-20 10:05:47,055 - INFO - ✅ Found 10 links for query: OpenAI Swarm Framework
15
+ 2024-11-20 10:05:47,055 - INFO - Scraping the URLs ...
16
+ 2024-11-20 10:05:47,055 - INFO - Scraping https://community.openai.com/t/agent-swarm-what-actually-is-the-point/578347 ...
17
+ 2024-11-20 10:05:47,056 - INFO - Scraping https://community.openai.com/t/introducing-swarm-js-node-js-implementation-of-openai-swarm/977510 ...
18
+ 2024-11-20 10:05:47,056 - INFO - Scraping https://community.openai.com/t/openai-swarm-for-agents-and-agent-handoffs/976579 ...
19
+ 2024-11-20 10:05:47,057 - INFO - Scraping https://cookbook.openai.com/examples/orchestrating_agents ...
20
+ 2024-11-20 10:05:47,058 - INFO - Scraping https://community.openai.com/t/swarm-some-initial-insights/976602 ...
21
+ 2024-11-20 10:05:47,059 - INFO - Scraping https://community.openai.com/t/how-to-use-async-functions-with-swarm/994569 ...
22
+ 2024-11-20 10:05:47,060 - INFO - Scraping https://community.openai.com/t/messages-i-o-growing-now-what/990194 ...
23
+ 2024-11-20 10:05:47,061 - INFO - Scraping https://forum.openai.com/public/events/virtual-event-technical-success-office-hours-gwpi7fv9mz ...
24
+ 2024-11-20 10:05:47,062 - INFO - Scraping https://community.openai.com/t/new-reasoning-models-openai-o1-preview-and-o1-mini/938081?page=3 ...
25
+ 2024-11-20 10:05:47,063 - INFO - Scraping https://forum.openai.com/public/videos/technical-success-office-hours-swam-11-14-2024 ...
26
+ 2024-11-20 10:05:47,358 - INFO - ✅ Successfully scraped https://community.openai.com/t/how-to-use-async-functions-with-swarm/994569 with length: 781
27
+ 2024-11-20 10:05:47,540 - INFO - ✅ Successfully scraped https://community.openai.com/t/introducing-swarm-js-node-js-implementation-of-openai-swarm/977510 with length: 3081
28
+ 2024-11-20 10:05:47,625 - INFO - ✅ Successfully scraped https://community.openai.com/t/swarm-some-initial-insights/976602 with length: 5786
29
+ 2024-11-20 10:05:47,662 - INFO - ✅ Successfully scraped https://community.openai.com/t/messages-i-o-growing-now-what/990194 with length: 12642
30
+ 2024-11-20 10:05:47,664 - INFO - ✅ Successfully scraped https://community.openai.com/t/openai-swarm-for-agents-and-agent-handoffs/976579 with length: 6016
31
+ 2024-11-20 10:05:47,666 - INFO - ✅ Successfully scraped https://community.openai.com/t/agent-swarm-what-actually-is-the-point/578347 with length: 11872
32
+ 2024-11-20 10:05:47,670 - INFO - ✅ Successfully scraped https://community.openai.com/t/new-reasoning-models-openai-o1-preview-and-o1-mini/938081?page=3 with length: 13588
33
+ 2024-11-20 10:05:47,778 - INFO - ✅ Successfully scraped https://forum.openai.com/public/events/virtual-event-technical-success-office-hours-gwpi7fv9mz with length: 3655
34
+ 2024-11-20 10:05:48,018 - INFO - ✅ Successfully scraped https://forum.openai.com/public/videos/technical-success-office-hours-swam-11-14-2024 with length: 47441
35
+ 2024-11-20 10:05:48,334 - INFO - ✅ Successfully scraped https://cookbook.openai.com/examples/orchestrating_agents with length: 18586
36
+ 2024-11-20 10:05:48,334 - INFO - ✅ Scraped 10 URLs.
37
+ 2024-11-20 10:05:48,335 - INFO - Chunking the text ...
38
+ 2024-11-20 10:05:48,356 - INFO - ✅ Generated 37 chunks ...
39
+ 2024-11-20 10:05:48,356 - INFO - Saving 37 chunks to DB ...
40
+ 2024-11-20 10:05:48,376 - INFO - Embedding 10 batches of chunks ...
41
+ 2024-11-20 10:05:49,796 - INFO - ✅ Finished embedding.
42
+ 2024-11-20 10:05:50,338 - INFO - ✅ Created the vector index ...
43
+ 2024-11-20 10:05:50,409 - INFO - ✅ Created the full text search index ...
44
+ 2024-11-20 10:05:50,410 - INFO - ✅ Successfully embedded and saved chunks to DB.
45
+ 2024-11-20 10:05:50,410 - INFO - Querying the vector DB to get context ...
46
+ 2024-11-20 10:05:50,621 - INFO - Running full-text search ...
47
+ 2024-11-20 10:05:50,644 - INFO - ✅ Got 13 matched chunks.
48
+ 2024-11-20 10:05:50,644 - INFO - Running inference with context ...
49
+ 2024-11-20 10:05:56,986 - INFO - ✅ Finished inference API call.
50
+ 2024-11-20 10:05:56,986 - INFO - Generating output ...
51
+ # Answer
52
+
53
+ OpenAI Swarm is an experimental framework designed to create, manage, and deploy multi-agent systems. It allows multiple AI agents to collaborate on complex tasks, differing significantly from traditional single-agent models and other OpenAI tools like Custom GPTs, API Completions, Functions, and Assistants.
54
+
55
+ Key differentiators of Swarm include:
56
+
57
+ 1. **Multi-Agent Collaboration**: Swarm enables agents to interact and coordinate, enhancing efficiency in problem-solving. Traditional models typically operate with single-agent interactions[1].
58
+
59
+ 2. **Orchestration and Coordination**: The framework provides mechanisms for task delegation, synchronization, and result aggregation essential for handling the complexity of multi-agent scenarios. Existing APIs primarily function within a single agent’s context without such coordination[1].
60
+
61
+ 3. **Scalability and Flexibility**: Swarm is designed to easily scale by adding specialized agents, offering customization for roles within the system. In contrast, existing APIs usually focus on increasing the capacity of a single model rather than expanding agent collaboration[1].
62
+
63
+ 4. **Ideal Use Cases**: Swarm is particularly useful for tasks that benefit from parallel processing and specialization, like complex simulations and large-scale data analysis. Other models are more suited to tasks manageable by single agents, such as content generation[1].
64
+
65
+ 5. **Back-End Integration**: Swarm is primarily tailored for back-end development, allowing integration into applications via programming languages like Python using APIs[1]. In contrast, other tools allow for more direct user interactions through front-end interfaces like ChatGPT[1].
66
+
67
+ It should be noted that Swarm is an educational resource for exploring multi-agent orchestration and not intended for production-ready applications, highlighting the significance of programming expertise for its implementation[3][5][11].
68
+
69
+
70
+ # References
71
+
72
+ [1] https://community.openai.com/t/swarm-some-initial-insights/976602
73
+ [2] https://community.openai.com/t/introducing-swarm-js-node-js-implementation-of-openai-swarm/977510
74
+ [3] https://community.openai.com/t/openai-swarm-for-agents-and-agent-handoffs/976579
75
+ [4] https://community.openai.com/t/swarm-some-initial-insights/976602
76
+ [5] https://community.openai.com/t/openai-swarm-for-agents-and-agent-handoffs/976579
77
+ [6] https://community.openai.com/t/how-to-use-async-functions-with-swarm/994569
78
+ [7] https://forum.openai.com/public/videos/technical-success-office-hours-swam-11-14-2024
79
+ [8] https://community.openai.com/t/agent-swarm-what-actually-is-the-point/578347
80
+ [9] https://forum.openai.com/public/videos/technical-success-office-hours-swam-11-14-2024
81
+ [10] https://community.openai.com/t/agent-swarm-what-actually-is-the-point/578347
82
+ [11] https://forum.openai.com/public/events/virtual-event-technical-success-office-hours-gwpi7fv9mz
83
+ [12] https://forum.openai.com/public/videos/technical-success-office-hours-swam-11-14-2024
84
+ [13] https://forum.openai.com/public/videos/technical-success-office-hours-swam-11-14-2024
85
+ ```
env.deepseek.tpl ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ LLM_BASE_URL=https://api.deepseek.com/v1
2
+ LLM_API_KEY=<deepseek-api-key>
3
+ DEFAULT_INFERENCE_MODEL=deepseek-chat
4
+
5
+ EMBED_BASE_URL=https://api.openai.com/v1
6
+ EMBED_API_KEY=<openai-api-key>
7
+ EMBEDDING_MODEL=text-embedding-3-small
8
+ EMBEDDING_DIMENSIONS=1536
env.ollama.tpl ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ LLM_BASE_URL=http://localhost:11434/v1
2
+ LLM_API_KEY=dummy-api-key
3
+
4
+ DEFAULT_INFERENCE_MODEL=llama3.2
5
+ EMBEDDING_MODEL=nomic-embed-text
6
+ EMBEDDING_DIMENSIONS=768
env.tpl ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # right now we use Google search API as the default search engine
2
+ SEARCH_API_URL=https://www.googleapis.com/customsearch/v1
3
+ SEARCH_API_KEY=<your-google-search-api-key>
4
+ SEARCH_PROJECT_KEY=<your-google-cx-key>
5
+
6
+ # right now we use OpenAI API as the default LLM inference engine and embedding model
7
+ LLM_BASE_URL=https://api.openai.com/v1
8
+ LLM_API_KEY=<your-openai-api-key>
9
+ DEFAULT_INFERENCE_MODEL=gpt-4o-mini
10
+ EMBEDDING_MODEL=text-embedding-3-small
11
+ EMBEDDING_DIMENSIONS=1536
12
+
13
+ # Run and share Gradio UI
14
+ RUN_GRADIO_UI=False
15
+ SHARE_GRADIO_UI=False
16
+
instructions/extract_example.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ class CompanyInfo(BaseModel):
2
+ name: str
3
+ description: str
instructions/links.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # you can specify a --url-list-file argument with links similar to the ones below
2
+ # ask.py will crawl these pages and answer the question based on their contents
3
+ https://en.wikipedia.org/wiki/Large_language_model
4
+ https://en.wikipedia.org/wiki/Retrieval-augmented_generation
requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ click==8.1.7
2
+ requests==2.32.3
3
+ numpy==1.26.4
4
+ jinja2==3.1.3
5
+ bs4==0.0.2
6
+ python-dotenv==1.0.1
7
+ openai==1.57.2
8
+ duckdb==1.1.2
9
+ gradio==5.3.0
10
+ chonkie==0.1.2
11
+ docling==2.5.2
scripts/draw_flow.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Re-importing necessary libraries and recreating the flowchart due to a reset.
2
+ import matplotlib.patches as patches
3
+ import matplotlib.pyplot as plt
4
+
5
+ # Create a figure
6
+ fig, ax = plt.subplots(figsize=(10, 14))
7
+
8
+
9
+ # Helper function to create a box
10
+ def create_box(text, x, y, width=2.5, height=0.8, color="lightblue"):
11
+ ax.add_patch(
12
+ patches.Rectangle(
13
+ (x, y), width, height, edgecolor="black", facecolor=color, lw=1.5
14
+ )
15
+ )
16
+ ax.text(x + width / 2, y + height / 2, text, ha="center", va="center", fontsize=10)
17
+
18
+
19
+ # Helper function to create an arrow
20
+ def create_arrow(x_start, y_start, x_end, y_end):
21
+ ax.annotate(
22
+ "",
23
+ xy=(x_end, y_end),
24
+ xytext=(x_start, y_start),
25
+ arrowprops=dict(facecolor="black", shrink=0.05, width=1.5, headwidth=8),
26
+ )
27
+
28
+
29
+ # Draw the flowchart components
30
+ create_box("Start", 4, 12)
31
+ create_box("Query Input", 4, 10.5)
32
+ create_box("Mode Selection", 4, 9)
33
+ create_box("Search Mode", 1.5, 7.5)
34
+ create_box("Local Mode", 6.5, 7.5)
35
+ create_box("Search Google", 1.5, 6)
36
+ create_box("Crawl and Scrape Results", 1.5, 4.5)
37
+ create_box("Use Local Files", 6.5, 6)
38
+ create_box("Extract Text Content", 6.5, 4.5)
39
+ create_box("Chunk Text Content", 4, 3)
40
+ create_box("Save to VectorDB", 4, 1.5)
41
+ create_box("Perform Hybrid Search", 4, 0)
42
+ create_box("[Optional] Re-rank Results", 4, -1.5)
43
+ create_box("Use Top Chunks as Context", 4, -3)
44
+ create_box("Generate Answer with References", 4, -4.5)
45
+ create_box("Output Answer", 4, -6)
46
+
47
+ # Draw the arrows
48
+ create_arrow(5.25, 12, 5.25, 11.3)
49
+ create_arrow(5.25, 10.5, 5.25, 9.8)
50
+ create_arrow(5.25, 9, 3.5, 8.3) # to Search Mode
51
+ create_arrow(5.25, 9, 6.5, 8.3) # to Local Mode
52
+ create_arrow(2.75, 7.5, 2.75, 6.8) # to Search Google
53
+ create_arrow(7.75, 7.5, 7.75, 6.8) # to Use Local Files
54
+ create_arrow(2.75, 6, 2.75, 5.3) # to Crawl and Scrape Results
55
+ create_arrow(7.75, 6, 7.75, 5.3) # to Extract Text Content
56
+ create_arrow(2.75, 4.5, 4, 3.8) # to Chunk Text Content
57
+ create_arrow(7.75, 4.5, 6, 3.8) # to Chunk Text Content
58
+ create_arrow(5.25, 3, 5.25, 2.3) # to Save to VectorDB
59
+ create_arrow(5.25, 1.5, 5.25, 0.8) # to Perform Hybrid Search
60
+ create_arrow(5.25, 0, 5.25, -0.8) # to Optional Re-rank Results
61
+ create_arrow(5.25, -1.5, 5.25, -2.3) # to Use Top Chunks as Context
62
+ create_arrow(5.25, -3, 5.25, -3.8) # to Generate Answer with References
63
+ create_arrow(5.25, -4.5, 5.25, -5.3) # to Output Answer
64
+
65
+ # Final touches
66
+ ax.axis("off")
67
+ plt.title("Flowchart of Query Processing System", fontsize=14)
68
+ plt.show()
svc.leettools.com ADDED
File without changes