File size: 5,195 Bytes
1327f34
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
# Copyright 2025 The Scenic Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Tests for Extract WikiGraph."""
from absl import logging
from absl.testing import absltest
from scenic.projects.knowledge_visual_language.data.wikidata import data_util


class ClipModelsTest(absltest.TestCase):
  """Single Test based on a dummy input."""

  def test_graph_extraction(self):
    """Tests whether it could correctly extract 2-hop graph."""
    rel_list = [
        ["Q1066517", "r1", "Qdummy"],
        ["Q35581", "r2", "Qdummy"],
        ["Q2339039", "r2", "Qdummy"],
        ["Q753878", "r2", "Qdummy"],
    ]
    logging.info("Construct KG Graph from edge list.")
    kg_graph = data_util.construct_kg_graph(rel_list)
    logging.info("Extract 2-hop subgraphs within wiki-pages.")
    demo_data = self.demo_input()
    for d in demo_data:
      in_context_ents = {ent: True for ent in d["link"] if ent != "UNK"}
      graph = data_util.extract_2hop_graph(in_context_ents, kg_graph)
      print(graph)
      logging.info(len(graph))

  def demo_input(self):
    d_list = [{
        "txt": "yukigassen\n is a snowball fighting-competition from "
               "japan. today there are annual tournaments in "
               "s\u014dbetsu, hokkaid\u014d in japan, kemij\u00e4rvi in"
               " finland, vard\u00f8 in norway, murmansk in russia, "
               "mount buller, victoria in australia, lule\u00e5 in "
               "sweden, anchorage in alaska, aparan in armenia, jasper,"
               " alberta and saskatoon, saskatchewan in canada.\nthe "
               "word consists of the japanese words yuki (snow) and "
               "kassen (battle) with rendaku. hence \"yukigassen\" "
               "means snow battle, but is a common term for 'snowball "
               "fight' in japanese.\n......",
        "link": {
            "Q1066517": [[17, 46], [483, 497]],
            "Q35581": [[106, 114]],
            "Q744704": [[125, 134]],
            "Q33": [[138, 145]],
            "Q108983": [[147, 152], [1720, 1725], [1907, 1912], [2089, 2094]],
            "Q1763": [[164, 172]],
            "Q984117": [[184, 206]],
            "Q26268": [[221, 226], [1534, 1539], [1015, 1020], [1175, 1180],
                       [1363, 1368]],
            "Q39450": [[238, 247]],
            "Q797": [[251, 257]],
            "Q39618": [[259, 265]],
            "Q399": [[269, 276]],
            "Q999429": [[278, 293]],
            "Q10566": [[298, 321]],
            "Q5287": [[358, 366]],
            "Q3943791": [[379, 383], [444, 448], [1627, 1631]],
            "Q178561": [[397, 403]],
            "Q1192464": [[410, 417]],
            "Q1035213": [[744, 760]],
            "Q131647": [[964, 969], [1118, 1123], [1305, 1310], [1477, 1482],
                        [1669, 1674], [1865, 1870], [2047, 2052], [925, 930]],
            "Q406039": [[1029, 1039], [1189, 1199], [1377, 1387], [1548, 1558],
                        [1734, 1744], [1922, 1926], [2104, 2108]],
            "Q847956": [[1052, 1064], [1211, 1223], [1404, 1416], [1584, 1596],
                        [1766, 1778], [1951, 1957], [2146, 2152]],
            "Q873364": [[1073, 1085], [1243, 1255], [1429, 1441], [1608, 1620],
                        [1808, 1820], [1975, 1981], [2175, 2181]],
            "Q44853": [[1745, 1753]],
            "Q218082": [[0, 10], [426, 436], [522, 532], [704, 714], [831,
                                                                      841]],
            "UNK": [[97, 104], [698, 725], [1040, 1051], [1200, 1210],
                    [1442, 1452], [1597, 1607], [1224, 1242], [1256, 1280],
                    [1559, 1583], [1388, 1403], [1621, 1640], [1779, 1795],
                    [1821, 1826], [1959, 1964], [1928, 1940], [1983, 2006]]
        },
        "entity": "Yukigassen"
    }, {
        "txt":
            "harry hyams\nharry john hyams (2 january 1928 \u2013 19 december "
            "2015) was a british millionaire who initially made his money as a"
            " speculative property (real estate) developer. he was best known "
            "as the developer of the centre point office building in "
            "london.\n......",
        "link": {
            "Q2339039": [[213, 225], [961, 973]],
            "Q149787": [[283, 289]],
            "Q19186": [[291, 300]],
            "Q753878": [[633, 642]],
            "Q743535": [[1139, 1146]],
            "Q43747844": [[1301, 1310]],
            "Q7290153": [[1388, 1402], [1566, 1580]],
            "Q7743416": [[1584, 1600]],
            "Q5669913": [[0, 11], [12, 28]]
        },
        "entity": "Harry Hyams"
    }]
    return d_list


if __name__ == "__main__":
  absltest.main()