Upload folder using huggingface_hub
Browse files- README.md +4 -5
- ask.py +7 -23
- requirements.txt +5 -5
README.md
CHANGED
|
@@ -88,8 +88,7 @@ Given a query, the program will
|
|
| 88 |
- use the top chunks as the context to ask an LLM to generate the answer
|
| 89 |
- output the answer with the references
|
| 90 |
|
| 91 |
-
Of course this flow is a very simplified version of the real AI search engines, but it is a good
|
| 92 |
-
starting point to understand the basic concepts.
|
| 93 |
|
| 94 |
One benefit is that we can manipulate the search function and output format.
|
| 95 |
|
|
@@ -112,9 +111,9 @@ the pipeline.
|
|
| 112 |
|
| 113 |
# modify .env file to set the API keys or export them as environment variables as below
|
| 114 |
|
| 115 |
-
# you
|
| 116 |
-
|
| 117 |
-
|
| 118 |
|
| 119 |
# right now we use OpenAI API, default using OpenAI
|
| 120 |
# % export LLM_BASE_URL=https://api.openai.com/v1
|
|
|
|
| 88 |
- use the top chunks as the context to ask an LLM to generate the answer
|
| 89 |
- output the answer with the references
|
| 90 |
|
| 91 |
+
Of course this flow is a very simplified version of the real AI search engines, but it is a good starting point to understand the basic concepts.
|
|
|
|
| 92 |
|
| 93 |
One benefit is that we can manipulate the search function and output format.
|
| 94 |
|
|
|
|
| 111 |
|
| 112 |
# modify .env file to set the API keys or export them as environment variables as below
|
| 113 |
|
| 114 |
+
# you need to set the Google search API
|
| 115 |
+
% export SEARCH_API_KEY="your-google-search-api-key"
|
| 116 |
+
% export SEARCH_PROJECT_KEY="your-google-cx-key"
|
| 117 |
|
| 118 |
# right now we use OpenAI API, default using OpenAI
|
| 119 |
# % export LLM_BASE_URL=https://api.openai.com/v1
|
ask.py
CHANGED
|
@@ -135,29 +135,13 @@ class Ask:
|
|
| 135 |
|
| 136 |
self.search_api_url = os.environ.get("SEARCH_API_URL")
|
| 137 |
if self.search_api_url is None:
|
| 138 |
-
self.
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
self.logger.info("No SEARCH_API_URL or SEARCH_API_KEYenv variable set.")
|
| 146 |
-
self.logger.info(
|
| 147 |
-
"Using the default proxy at https://svc.leettools.com:8098"
|
| 148 |
-
)
|
| 149 |
-
self.search_api_url = "https://svc.leettools.com:8098/customsearch/v1"
|
| 150 |
-
self.search_api_key = "dummy-search-api-key"
|
| 151 |
-
self.search_project_id = "dummy-search-project-id"
|
| 152 |
-
else:
|
| 153 |
-
self.search_api_key = os.environ.get("SEARCH_API_KEY")
|
| 154 |
-
if self.search_api_key is None:
|
| 155 |
-
err_msg += (
|
| 156 |
-
f"SEARCH_API_KEY env variable not set for {self.search_api_url}.\n"
|
| 157 |
-
)
|
| 158 |
-
self.search_project_id = os.environ.get("SEARCH_PROJECT_KEY")
|
| 159 |
-
if self.search_project_id is None:
|
| 160 |
-
err_msg += f"SEARCH_PROJECT_KEY env variable not set for {self.search_api_url}.\n"
|
| 161 |
|
| 162 |
self.llm_base_url = os.environ.get("LLM_BASE_URL")
|
| 163 |
if self.llm_base_url is None:
|
|
|
|
| 135 |
|
| 136 |
self.search_api_url = os.environ.get("SEARCH_API_URL")
|
| 137 |
if self.search_api_url is None:
|
| 138 |
+
self.search_api_url = "https://www.googleapis.com/customsearch/v1"
|
| 139 |
+
self.search_api_key = os.environ.get("SEARCH_API_KEY")
|
| 140 |
+
self.search_project_id = os.environ.get("SEARCH_PROJECT_KEY")
|
| 141 |
+
if self.search_api_key is None:
|
| 142 |
+
err_msg += "SEARCH_API_KEY env variable is not set.\n"
|
| 143 |
+
if self.search_project_id is None:
|
| 144 |
+
err_msg += "SEARCH_PROJECT_KEY env variable is not set.\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 145 |
|
| 146 |
self.llm_base_url = os.environ.get("LLM_BASE_URL")
|
| 147 |
if self.llm_base_url is None:
|
requirements.txt
CHANGED
|
@@ -4,8 +4,8 @@ numpy==1.26.4
|
|
| 4 |
jinja2==3.1.3
|
| 5 |
bs4==0.0.2
|
| 6 |
python-dotenv==1.0.1
|
| 7 |
-
openai==1.
|
| 8 |
-
duckdb==1.
|
| 9 |
-
gradio==5.
|
| 10 |
-
chonkie==0.
|
| 11 |
-
docling==2.
|
|
|
|
| 4 |
jinja2==3.1.3
|
| 5 |
bs4==0.0.2
|
| 6 |
python-dotenv==1.0.1
|
| 7 |
+
openai==1.87.0
|
| 8 |
+
duckdb==1.3.0
|
| 9 |
+
gradio==5.34.0
|
| 10 |
+
chonkie==v1.0.10
|
| 11 |
+
docling==2.36.0
|