Spaces:
Running
Running
File size: 4,917 Bytes
92feab2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 |
from typing import List, Optional
import json
import gradio as gr
import requests
from .utils import html_format_doc
from .retrieval.up_orgs_keyword import organization_card_html
from .retrieval.elastic import reranker, get_query_results
from .retrieval.config import ALL_INDICES
from . import UP_QA_SEARCH_API
def run_search(search_text: str, indices: Optional[List[str]] = None):
results = get_query_results(search_text, indices=indices)
output = []
for result in reranker(results):
source_name = None
if "news" in result.index:
source_name = "news"
elif "transactions" in result.index:
source_name = "transactions"
elif "organizations" in result.index:
source_name = "organizations"
elif "issuelab-elser" in result.index:
source_name = "issuelab"
# elif "issuelab" in result.index:
# source_name = "issuelab"
elif "youtube-elser" in result.index:
source_name = "youtube"
# elif "youtube" in result.index:
# source_name = "youtube"
elif "candid-blog-elser" in result.index:
source_name = "candid_blog"
# elif "candid-blog" in result.index:
# source_name = "candid_blog"
elif "candid-learning" in result.index: # TODO fix that
source_name = "candid_learning"
elif "candid-help-elser" in result.index:
source_name = "candid_help"
doc = html_format_doc(doc=result.source, source=source_name)
output.append(doc)
return f"<div>{''.join(output)}</div>"
def run_ks(search_text: str):
json_body = {"keyword": search_text, "rowCount": 10}
response = requests.post(
url=UP_QA_SEARCH_API["API_URL"],
json=json_body,
headers={
"accept": "application/json",
"content-type": "application/json",
"x-api-key": UP_QA_SEARCH_API["API_KEY"]
},
timeout=(5 * 60)
)
r_json = json.loads(response.text)
output_k = []
if r_json.get("returnedOrgs", None) is not None:
for doc in r_json["returnedOrgs"]:
org = {}
org["candid_entity_id"] = doc.get("candidEntityID", "")
org["main_name"] = doc.get("orgName", "")
org["logo"] = doc.get("logo", "")
org["seal"] = doc.get("seal", {})
org["city"] = doc.get("city", "")
org["admin1"] = doc.get("admin1", "")
org["country_name"] = doc.get("countryName", "")
org["taxonomy"] = doc.get("taxonomy", {})
highlights = doc.get("highlights", [])
if highlights:
for h in highlights:
if h["field"] == "mission_statement":
org["mission_statement"] = "; ".join(h["highlights"])
html = organization_card_html(org, 250)
output_k.append(html)
# Getting semantic results
output_s = run_search(search_text=search_text)
return f"<div>{''.join(output_k)}</div>", output_s
def build_search_tab() -> gr.Blocks:
with gr.Blocks(theme=gr.themes.Soft(), title="Semantic search") as demo:
gr.Markdown(
"<h1>Alpha demo: Semantic search</h1>"
"Search and ask questions of Candid's data together with casual language"
)
query = gr.Text(placeholder="Search", show_label=False)
with gr.Accordion(label="Advanced settings", open=False):
es_indices = gr.CheckboxGroup(
choices=list(ALL_INDICES),
value=list(ALL_INDICES),
label="Sources to include",
interactive=True
)
search = gr.Button("Search")
feed = gr.HTML()
# pylint: disable=no-member
search.click(
fn=run_search,
inputs=[query, es_indices],
outputs=[feed],
api_name=False,
queue=True
)
return demo
def build_ks_tab() -> gr.Blocks:
with gr.Blocks(theme=gr.themes.Soft(), title="Semantic search") as demo:
gr.Markdown(
"<h1>Alpha demo: Keyword versus Semantic search</h1>"
"Compare current search results versus semantic search results"
)
query = gr.TextArea(placeholder="Search", show_label=False, lines=1)
ask = gr.Button("Search Unified Platform organizations")
with gr.Row():
with gr.Column():
gr.Markdown("<h2>Keyword results</h2>")
feed_k = gr.HTML()
with gr.Column():
gr.Markdown("<h2>Semantic results</h2>")
feed_s = gr.HTML()
# pylint: disable=no-member
ask.click(
fn=run_ks,
inputs=[query],
outputs=[feed_k, feed_s],
api_name=False,
queue=True
)
return demo
|