Spaces:
Sleeping
Sleeping
Upload folder using huggingface_hub
Browse files
.env.tpl
CHANGED
|
@@ -3,4 +3,8 @@ SEARCH_API_KEY=your-google-search-api-key
|
|
| 3 |
SEARCH_PROJECT_KEY=your-google-cx-key
|
| 4 |
|
| 5 |
# right now we use OpenAI API
|
| 6 |
-
LLM_API_KEY=your-openai-api-key
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
SEARCH_PROJECT_KEY=your-google-cx-key
|
| 4 |
|
| 5 |
# right now we use OpenAI API
|
| 6 |
+
LLM_API_KEY=your-openai-api-key
|
| 7 |
+
|
| 8 |
+
# Run and share Gradio UI
|
| 9 |
+
RUN_GRADIO_UI=Faslse
|
| 10 |
+
SHARE_GRADIO_UI=False
|
README.md
CHANGED
|
@@ -11,6 +11,14 @@ sdk_version: 5.3.0
|
|
| 11 |
A single Python program to implement the search-extract-summarize flow, similar to AI search
|
| 12 |
engines such as Perplexity.
|
| 13 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
> [!NOTE]
|
| 15 |
> Our main goal is to illustrate the basic concepts of AI search engines with the raw constructs.
|
| 16 |
> Performance or scalability is not in the scope of this program.
|
|
@@ -64,17 +72,17 @@ Usage: ask.py [OPTIONS]
|
|
| 64 |
Search web for the query and summarize the results
|
| 65 |
|
| 66 |
Options:
|
| 67 |
-
-
|
| 68 |
-
|
| 69 |
-
target URL list and answer the query based
|
| 70 |
-
on the content [default:
|
| 71 |
-
instructions/links.txt]
|
| 72 |
-d, --date-restrict INTEGER Restrict search results to a specific date
|
| 73 |
range, default is no restriction
|
| 74 |
-s, --target-site TEXT Restrict search results to a specific site,
|
| 75 |
default is no restriction
|
| 76 |
--output-language TEXT Output language for the answer
|
| 77 |
--output-length INTEGER Output length for the answer
|
|
|
|
|
|
|
|
|
|
| 78 |
-m, --model-name TEXT Model name to use for inference
|
| 79 |
-l, --log-level [DEBUG|INFO|WARNING|ERROR]
|
| 80 |
Set the logging level [default: INFO]
|
|
@@ -87,7 +95,12 @@ Options:
|
|
| 87 |
- [OpenAI API](https://beta.openai.com/docs/api-reference/completions/create)
|
| 88 |
- [Jinja2](https://jinja.palletsprojects.com/en/3.0.x/)
|
| 89 |
- [bs4](https://www.crummy.com/software/BeautifulSoup/bs4/doc/)
|
| 90 |
-
- [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 91 |
|
| 92 |
## Sample output
|
| 93 |
|
|
|
|
| 11 |
A single Python program to implement the search-extract-summarize flow, similar to AI search
|
| 12 |
engines such as Perplexity.
|
| 13 |
|
| 14 |
+
> [UPDATE]
|
| 15 |
+
>
|
| 16 |
+
> - 2024-10-22: add GradIO integation
|
| 17 |
+
> - 2024-10-21: use DuckDB for the vector search and use API for embedding
|
| 18 |
+
> - 2024-10-20: allow to specify a list of input urls
|
| 19 |
+
> - 2024-10-18: output-language and output-length parameters for LLM
|
| 20 |
+
> - 2024-10-18: date-restrict and target-site parameters for seach
|
| 21 |
+
|
| 22 |
> [!NOTE]
|
| 23 |
> Our main goal is to illustrate the basic concepts of AI search engines with the raw constructs.
|
| 24 |
> Performance or scalability is not in the scope of this program.
|
|
|
|
| 72 |
Search web for the query and summarize the results
|
| 73 |
|
| 74 |
Options:
|
| 75 |
+
--web-ui Launch the web interface
|
| 76 |
+
-q, --query TEXT Query to search
|
|
|
|
|
|
|
|
|
|
| 77 |
-d, --date-restrict INTEGER Restrict search results to a specific date
|
| 78 |
range, default is no restriction
|
| 79 |
-s, --target-site TEXT Restrict search results to a specific site,
|
| 80 |
default is no restriction
|
| 81 |
--output-language TEXT Output language for the answer
|
| 82 |
--output-length INTEGER Output length for the answer
|
| 83 |
+
--url-list-file TEXT Instead of doing web search, scrape the
|
| 84 |
+
target URL list and answer the query based
|
| 85 |
+
on the content
|
| 86 |
-m, --model-name TEXT Model name to use for inference
|
| 87 |
-l, --log-level [DEBUG|INFO|WARNING|ERROR]
|
| 88 |
Set the logging level [default: INFO]
|
|
|
|
| 95 |
- [OpenAI API](https://beta.openai.com/docs/api-reference/completions/create)
|
| 96 |
- [Jinja2](https://jinja.palletsprojects.com/en/3.0.x/)
|
| 97 |
- [bs4](https://www.crummy.com/software/BeautifulSoup/bs4/doc/)
|
| 98 |
+
- [DuckDB](https://github.com/duckdb/duckdb)
|
| 99 |
+
- [GradIO](https://grad.io)
|
| 100 |
+
|
| 101 |
+
## Screenshot for the GradIO integration
|
| 102 |
+
|
| 103 |
+

|
| 104 |
|
| 105 |
## Sample output
|
| 106 |
|
ask.py
CHANGED
|
@@ -410,8 +410,6 @@ def _run_query(
|
|
| 410 |
) -> str:
|
| 411 |
logger = get_logger(log_level)
|
| 412 |
|
| 413 |
-
load_dotenv(dotenv_path=default_env_file, override=False)
|
| 414 |
-
|
| 415 |
ask = Ask(logger=logger)
|
| 416 |
|
| 417 |
if url_list_str is None or url_list_str.strip() == "":
|
|
@@ -474,6 +472,7 @@ def launch_gradio(
|
|
| 474 |
url_list_str: str,
|
| 475 |
model_name: str,
|
| 476 |
log_level: str,
|
|
|
|
| 477 |
) -> None:
|
| 478 |
iface = gr.Interface(
|
| 479 |
fn=_run_query,
|
|
@@ -513,7 +512,7 @@ def launch_gradio(
|
|
| 513 |
description="Search the web with the query and summarize the results. Source code: https://github.com/pengfeng/ask.py",
|
| 514 |
)
|
| 515 |
|
| 516 |
-
iface.launch()
|
| 517 |
|
| 518 |
|
| 519 |
@click.command(help="Search web for the query and summarize the results")
|
|
@@ -586,7 +585,13 @@ def search_extract_summarize(
|
|
| 586 |
model_name: str,
|
| 587 |
log_level: str,
|
| 588 |
):
|
| 589 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 590 |
launch_gradio(
|
| 591 |
query=query,
|
| 592 |
date_restrict=date_restrict,
|
|
@@ -596,6 +601,7 @@ def search_extract_summarize(
|
|
| 596 |
url_list_str=_read_url_list(url_list_file),
|
| 597 |
model_name=model_name,
|
| 598 |
log_level=log_level,
|
|
|
|
| 599 |
)
|
| 600 |
else:
|
| 601 |
if query is None:
|
|
|
|
| 410 |
) -> str:
|
| 411 |
logger = get_logger(log_level)
|
| 412 |
|
|
|
|
|
|
|
| 413 |
ask = Ask(logger=logger)
|
| 414 |
|
| 415 |
if url_list_str is None or url_list_str.strip() == "":
|
|
|
|
| 472 |
url_list_str: str,
|
| 473 |
model_name: str,
|
| 474 |
log_level: str,
|
| 475 |
+
share_ui: bool,
|
| 476 |
) -> None:
|
| 477 |
iface = gr.Interface(
|
| 478 |
fn=_run_query,
|
|
|
|
| 512 |
description="Search the web with the query and summarize the results. Source code: https://github.com/pengfeng/ask.py",
|
| 513 |
)
|
| 514 |
|
| 515 |
+
iface.launch(share=share_ui)
|
| 516 |
|
| 517 |
|
| 518 |
@click.command(help="Search web for the query and summarize the results")
|
|
|
|
| 585 |
model_name: str,
|
| 586 |
log_level: str,
|
| 587 |
):
|
| 588 |
+
load_dotenv(dotenv_path=default_env_file, override=False)
|
| 589 |
+
|
| 590 |
+
if web_ui or os.environ.get("RUN_GRADIO_UI", "false").lower() != "false":
|
| 591 |
+
if os.environ.get("SHARE_GRADIO_UI", "false").lower() == "true":
|
| 592 |
+
share_ui = True
|
| 593 |
+
else:
|
| 594 |
+
share_ui = False
|
| 595 |
launch_gradio(
|
| 596 |
query=query,
|
| 597 |
date_restrict=date_restrict,
|
|
|
|
| 601 |
url_list_str=_read_url_list(url_list_file),
|
| 602 |
model_name=model_name,
|
| 603 |
log_level=log_level,
|
| 604 |
+
share_ui=share_ui,
|
| 605 |
)
|
| 606 |
else:
|
| 607 |
if query is None:
|