File size: 4,917 Bytes
92feab2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
from typing import List, Optional
import json

import gradio as gr
import requests

from .utils import html_format_doc
from .retrieval.up_orgs_keyword import organization_card_html
from .retrieval.elastic import reranker, get_query_results
from .retrieval.config import ALL_INDICES
from . import UP_QA_SEARCH_API


def run_search(search_text: str, indices: Optional[List[str]] = None):
    results = get_query_results(search_text, indices=indices)

    output = []
    for result in reranker(results):
        source_name = None
        if "news" in result.index:
            source_name = "news"
        elif "transactions" in result.index:
            source_name = "transactions"
        elif "organizations" in result.index:
            source_name = "organizations"
        elif "issuelab-elser" in result.index:
            source_name = "issuelab"
        # elif "issuelab" in result.index:
        #     source_name = "issuelab"
        elif "youtube-elser" in result.index:
            source_name = "youtube"
        # elif "youtube" in result.index:
        #     source_name = "youtube"
        elif "candid-blog-elser" in result.index:
            source_name = "candid_blog"
        # elif "candid-blog" in result.index:
        #     source_name = "candid_blog"
        elif "candid-learning" in result.index:  # TODO fix that
            source_name = "candid_learning"
        elif "candid-help-elser" in result.index:
            source_name = "candid_help"

        doc = html_format_doc(doc=result.source, source=source_name)
        output.append(doc)
    return f"<div>{''.join(output)}</div>"


def run_ks(search_text: str):
    json_body = {"keyword": search_text, "rowCount": 10}

    response = requests.post(
        url=UP_QA_SEARCH_API["API_URL"],
        json=json_body,
        headers={
            "accept": "application/json",
            "content-type": "application/json",
            "x-api-key": UP_QA_SEARCH_API["API_KEY"]
        },
        timeout=(5 * 60)
    )

    r_json = json.loads(response.text)
    output_k = []
    if r_json.get("returnedOrgs", None) is not None:
        for doc in r_json["returnedOrgs"]:
            org = {}
            org["candid_entity_id"] = doc.get("candidEntityID", "")
            org["main_name"] = doc.get("orgName", "")
            org["logo"] = doc.get("logo", "")
            org["seal"] = doc.get("seal", {})
            org["city"] = doc.get("city", "")
            org["admin1"] = doc.get("admin1", "")
            org["country_name"] = doc.get("countryName", "")
            org["taxonomy"] = doc.get("taxonomy", {})
            highlights = doc.get("highlights", [])
            if highlights:
                for h in highlights:
                    if h["field"] == "mission_statement":
                        org["mission_statement"] = "; ".join(h["highlights"])

            html = organization_card_html(org, 250)
            output_k.append(html)

    # Getting semantic results
    output_s = run_search(search_text=search_text)

    return f"<div>{''.join(output_k)}</div>", output_s


def build_search_tab() -> gr.Blocks:
    with gr.Blocks(theme=gr.themes.Soft(), title="Semantic search") as demo:
        gr.Markdown(
            "<h1>Alpha demo: Semantic search</h1>"
            "Search and ask questions of Candid's data together with casual language"
        )

        query = gr.Text(placeholder="Search", show_label=False)

        with gr.Accordion(label="Advanced settings", open=False):
            es_indices = gr.CheckboxGroup(
                choices=list(ALL_INDICES),
                value=list(ALL_INDICES),
                label="Sources to include",
                interactive=True
            )
        search = gr.Button("Search")

        feed = gr.HTML()

        # pylint: disable=no-member
        search.click(
            fn=run_search,
            inputs=[query, es_indices],
            outputs=[feed],
            api_name=False,
            queue=True
        )
    return demo


def build_ks_tab() -> gr.Blocks:
    with gr.Blocks(theme=gr.themes.Soft(), title="Semantic search") as demo:
        gr.Markdown(
        "<h1>Alpha demo: Keyword versus Semantic search</h1>"
        "Compare current search results versus semantic search results"
        )
        query = gr.TextArea(placeholder="Search", show_label=False, lines=1)
        ask = gr.Button("Search Unified Platform organizations")
        with gr.Row():
            with gr.Column():
                gr.Markdown("<h2>Keyword results</h2>")
                feed_k = gr.HTML()
            with gr.Column():
                gr.Markdown("<h2>Semantic results</h2>")
                feed_s = gr.HTML()

        # pylint: disable=no-member
        ask.click(
            fn=run_ks,
            inputs=[query],
            outputs=[feed_k, feed_s],
            api_name=False,
            queue=True
        )

    return demo