Spaces:
Running
Running
| from typing import List, Optional | |
| import json | |
| import gradio as gr | |
| import requests | |
| from .utils import html_format_doc | |
| from .retrieval.up_orgs_keyword import organization_card_html | |
| from .retrieval.elastic import reranker, get_query_results | |
| from .retrieval.config import ALL_INDICES | |
| from . import UP_QA_SEARCH_API | |
| def run_search(search_text: str, indices: Optional[List[str]] = None): | |
| results = get_query_results(search_text, indices=indices) | |
| output = [] | |
| for result in reranker(results): | |
| source_name = None | |
| if "news" in result.index: | |
| source_name = "news" | |
| elif "transactions" in result.index: | |
| source_name = "transactions" | |
| elif "organizations" in result.index: | |
| source_name = "organizations" | |
| elif "issuelab-elser" in result.index: | |
| source_name = "issuelab" | |
| # elif "issuelab" in result.index: | |
| # source_name = "issuelab" | |
| elif "youtube-elser" in result.index: | |
| source_name = "youtube" | |
| # elif "youtube" in result.index: | |
| # source_name = "youtube" | |
| elif "candid-blog-elser" in result.index: | |
| source_name = "candid_blog" | |
| # elif "candid-blog" in result.index: | |
| # source_name = "candid_blog" | |
| elif "candid-learning" in result.index: # TODO fix that | |
| source_name = "candid_learning" | |
| elif "candid-help-elser" in result.index: | |
| source_name = "candid_help" | |
| doc = html_format_doc(doc=result.source, source=source_name) | |
| output.append(doc) | |
| return f"<div>{''.join(output)}</div>" | |
| def run_ks(search_text: str): | |
| json_body = {"keyword": search_text, "rowCount": 10} | |
| response = requests.post( | |
| url=UP_QA_SEARCH_API["API_URL"], | |
| json=json_body, | |
| headers={ | |
| "accept": "application/json", | |
| "content-type": "application/json", | |
| "x-api-key": UP_QA_SEARCH_API["API_KEY"] | |
| }, | |
| timeout=(5 * 60) | |
| ) | |
| r_json = json.loads(response.text) | |
| output_k = [] | |
| if r_json.get("returnedOrgs", None) is not None: | |
| for doc in r_json["returnedOrgs"]: | |
| org = {} | |
| org["candid_entity_id"] = doc.get("candidEntityID", "") | |
| org["main_name"] = doc.get("orgName", "") | |
| org["logo"] = doc.get("logo", "") | |
| org["seal"] = doc.get("seal", {}) | |
| org["city"] = doc.get("city", "") | |
| org["admin1"] = doc.get("admin1", "") | |
| org["country_name"] = doc.get("countryName", "") | |
| org["taxonomy"] = doc.get("taxonomy", {}) | |
| highlights = doc.get("highlights", []) | |
| if highlights: | |
| for h in highlights: | |
| if h["field"] == "mission_statement": | |
| org["mission_statement"] = "; ".join(h["highlights"]) | |
| html = organization_card_html(org, 250) | |
| output_k.append(html) | |
| # Getting semantic results | |
| output_s = run_search(search_text=search_text) | |
| return f"<div>{''.join(output_k)}</div>", output_s | |
| def build_search_tab() -> gr.Blocks: | |
| with gr.Blocks(theme=gr.themes.Soft(), title="Semantic search") as demo: | |
| gr.Markdown( | |
| "<h1>Alpha demo: Semantic search</h1>" | |
| "Search and ask questions of Candid's data together with casual language" | |
| ) | |
| query = gr.Text(placeholder="Search", show_label=False) | |
| with gr.Accordion(label="Advanced settings", open=False): | |
| es_indices = gr.CheckboxGroup( | |
| choices=list(ALL_INDICES), | |
| value=list(ALL_INDICES), | |
| label="Sources to include", | |
| interactive=True | |
| ) | |
| search = gr.Button("Search") | |
| feed = gr.HTML() | |
| # pylint: disable=no-member | |
| search.click( | |
| fn=run_search, | |
| inputs=[query, es_indices], | |
| outputs=[feed], | |
| api_name=False, | |
| queue=True | |
| ) | |
| return demo | |
| def build_ks_tab() -> gr.Blocks: | |
| with gr.Blocks(theme=gr.themes.Soft(), title="Semantic search") as demo: | |
| gr.Markdown( | |
| "<h1>Alpha demo: Keyword versus Semantic search</h1>" | |
| "Compare current search results versus semantic search results" | |
| ) | |
| query = gr.TextArea(placeholder="Search", show_label=False, lines=1) | |
| ask = gr.Button("Search Unified Platform organizations") | |
| with gr.Row(): | |
| with gr.Column(): | |
| gr.Markdown("<h2>Keyword results</h2>") | |
| feed_k = gr.HTML() | |
| with gr.Column(): | |
| gr.Markdown("<h2>Semantic results</h2>") | |
| feed_s = gr.HTML() | |
| # pylint: disable=no-member | |
| ask.click( | |
| fn=run_ks, | |
| inputs=[query], | |
| outputs=[feed_k, feed_s], | |
| api_name=False, | |
| queue=True | |
| ) | |
| return demo | |